diff --git a/ascon/Implementations/.clang-format b/ascon/Implementations/.clang-format new file mode 100644 index 0000000..f2dd0de --- /dev/null +++ b/ascon/Implementations/.clang-format @@ -0,0 +1,168 @@ +--- +Language: Cpp +# BasedOnStyle: Google +AccessModifierOffset: -1 +AlignAfterOpenBracket: Align +AlignConsecutiveMacros: false +AlignConsecutiveAssignments: false +AlignConsecutiveDeclarations: false +AlignEscapedNewlines: Left +AlignOperands: true +AlignTrailingComments: true +AllowAllArgumentsOnNextLine: true +AllowAllConstructorInitializersOnNextLine: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: Never +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: All +AllowShortLambdasOnASingleLine: All +AllowShortIfStatementsOnASingleLine: WithoutElse +AllowShortLoopsOnASingleLine: true +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: true +AlwaysBreakTemplateDeclarations: Yes +BinPackArguments: true +BinPackParameters: true +BraceWrapping: + AfterCaseLabel: false + AfterClass: false + AfterControlStatement: false + AfterEnum: false + AfterFunction: false + AfterNamespace: false + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + AfterExternBlock: false + BeforeCatch: false + BeforeElse: false + IndentBraces: false + SplitEmptyFunction: true + SplitEmptyRecord: true + SplitEmptyNamespace: true +BreakBeforeBinaryOperators: None +BreakBeforeBraces: Attach +BreakBeforeInheritanceComma: false +BreakInheritanceList: BeforeColon +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: false +BreakConstructorInitializers: BeforeColon +BreakAfterJavaFieldAnnotations: false +BreakStringLiterals: true +ColumnLimit: 80 +CommentPragmas: '^ IWYU pragma:' +CompactNamespaces: false +ConstructorInitializerAllOnOneLineOrOnePerLine: true +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DeriveLineEnding: true +DerivePointerAlignment: true +DisableFormat: false +ExperimentalAutoDetectBinPacking: false +FixNamespaceComments: true +ForEachMacros: + - foreach + - Q_FOREACH + - BOOST_FOREACH +IncludeBlocks: Regroup +IncludeCategories: + - Regex: '^' + Priority: 2 + SortPriority: 0 + - Regex: '^<.*\.h>' + Priority: 1 + SortPriority: 0 + - Regex: '^<.*' + Priority: 2 + SortPriority: 0 + - Regex: '.*' + Priority: 3 + SortPriority: 0 +IncludeIsMainRegex: '([-_](test|unittest))?$' +IncludeIsMainSourceRegex: '' +IndentCaseLabels: true +IndentGotoLabels: true +IndentPPDirectives: None +IndentWidth: 2 +IndentWrappedFunctionNames: false +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: false +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBinPackProtocolList: Never +ObjCBlockIndentWidth: 2 +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: true +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 1 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyBreakTemplateDeclaration: 10 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 200 +PointerAlignment: Left +RawStringFormats: + - Language: Cpp + Delimiters: + - cc + - CC + - cpp + - Cpp + - CPP + - 'c++' + - 'C++' + CanonicalDelimiter: '' + BasedOnStyle: google + - Language: TextProto + Delimiters: + - pb + - PB + - proto + - PROTO + EnclosingFunctions: + - EqualsProto + - EquivToProto + - PARSE_PARTIAL_TEXT_PROTO + - PARSE_TEST_PROTO + - PARSE_TEXT_PROTO + - ParseTextOrDie + - ParseTextProtoOrDie + CanonicalDelimiter: '' + BasedOnStyle: google +ReflowComments: true +SortIncludes: true +SortUsingDeclarations: true +SpaceAfterCStyleCast: false +SpaceAfterLogicalNot: false +SpaceAfterTemplateKeyword: true +SpaceBeforeAssignmentOperators: true +SpaceBeforeCpp11BracedList: false +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeParens: ControlStatements +SpaceBeforeRangeBasedForLoopColon: true +SpaceInEmptyBlock: false +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 2 +SpacesInAngles: false +SpacesInConditionalStatement: false +SpacesInContainerLiterals: true +SpacesInCStyleCastParentheses: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +SpaceBeforeSquareBrackets: false +Standard: Auto +StatementMacros: + - Q_UNUSED + - QT_REQUIRE_VERSION +TabWidth: 8 +UseCRLF: false +UseTab: Never +... + diff --git a/ascon/Implementations/CMakeLists.txt b/ascon/Implementations/CMakeLists.txt new file mode 100644 index 0000000..08bbf6b --- /dev/null +++ b/ascon/Implementations/CMakeLists.txt @@ -0,0 +1,91 @@ +cmake_minimum_required(VERSION 3.6) +project(ascon LANGUAGES C ASM) +enable_testing() + +# set the default version, algorithms, implementations, tests, flags, defs +set(DEFAULT_VERSIONS v12) +set(DEFAULT_ALGS ascon128 ascon128a ascon80pq asconhash asconxof) +set(DEFAULT_IMPLS ref opt64 opt64_lowsize bi32 bi32_lowsize bi32_lowreg bi16 bi8 + opt64_2shares opt64_3shares bi32_2shares bi32_3shares) +set(DEFAULT_TESTS genkat getcycles) +set(DEFAULT_REL_FLAGS -std=c99 -O2 -fomit-frame-pointer -march=native -mtune=native) +set(DEFAULT_DBG_FLAGS -std=c99 -O2 -Wall -Wextra -Wshadow) +set(DEFAULT_COMPILE_DEFS -DASCON_DATA_ACCESS='H') + +# set cmake variables for version, algorithms, implementations, tests, flags, defs +set(VERSION_LIST ${DEFAULT_VERSIONS} CACHE STRING "Choose the ascon versions to include.") +set(ALG_LIST ${DEFAULT_ALGS} CACHE STRING "Choose the list of algorithms to include.") +set(IMPL_LIST ${DEFAULT_IMPLS} CACHE STRING "Choose the list of implementations to include.") +set(TEST_LIST ${DEFAULT_TESTS} CACHE STRING "Choose the list of tests to include.") +set(REL_FLAGS ${DEFAULT_REL_FLAGS} CACHE STRING "Define custom Release (performance) flags.") +set(DBG_FLAGS ${DEFAULT_DBG_FLAGS} CACHE STRING "Define custom Debug (NIST) flags.") +set(COMPILE_DEFS ${DEFAULT_COMPILE_DEFS} CACHE STRING "Define custom compile definitions.") + +if(NOT WIN32 AND NOT CYGWIN AND NOT MSYS) + # use sanitizer in Debug build (but not on windows) + set(DBG_FLAGS ${DBG_FLAGS} -fsanitize=address,undefined -static-libasan) +endif() +if(MSVC) + set(DBG_FLAGS /Od) +endif() + +# set the default build type for single-config generators if none was specified +if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) + message(STATUS "Setting build type to 'Release' as none was specified.") + set(CMAKE_BUILD_TYPE Release CACHE STRING + "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel." FORCE) +endif() + +# add platform specific implementations +message(STATUS "cmake host system name: ${CMAKE_HOST_SYSTEM_NAME}") +message(STATUS "cmake host system processor: ${CMAKE_HOST_SYSTEM_PROCESSOR}") +if(${CMAKE_HOST_SYSTEM_PROCESSOR} STREQUAL x86_64 OR ${CMAKE_HOST_SYSTEM_PROCESSOR} STREQUAL AMD64) + set(DEFAULT_IMPLS ${DEFAULT_IMPLS}) +elseif(${CMAKE_HOST_SYSTEM_PROCESSOR} MATCHES ARM OR ${CMAKE_HOST_SYSTEM_PROCESSOR} MATCHES arm) + set(DEFAULT_IMPLS ${DEFAULT_IMPLS} bi32_arm neon) +endif() + +set(KAT_PATH KAT) +set(TEST_PATH tests) +foreach(CRYPTO aead hash) + foreach(VER ${VERSION_LIST}) + foreach(ALG ${ALG_LIST}) + foreach(IMPL ${IMPL_LIST}) + set(IMPL_PATH crypto_${CRYPTO}/${ALG}${VER}/${IMPL}) + if(NOT EXISTS ${CMAKE_SOURCE_DIR}/${IMPL_PATH}) + continue() + endif() + message("Adding implementation ${IMPL_PATH}") + string(REGEX REPLACE "/" "_" IMPL_NAME ${IMPL_PATH}) + file(GLOB IMPL_FILES RELATIVE ${CMAKE_SOURCE_DIR} "${IMPL_PATH}/*.[ch]") + add_library(${IMPL_NAME} ${IMPL_FILES}) + target_include_directories(${IMPL_NAME} PUBLIC ${IMPL_PATH} ${TEST_PATH}) + target_compile_definitions(${IMPL_NAME} PRIVATE ${COMPILE_DEFS}) + #target_compile_features(${IMPL_NAME} PUBLIC c_std_99) # cmake >= 3.8.2 + target_compile_options(${IMPL_NAME} PUBLIC $<$:${REL_FLAGS}>) + target_compile_options(${IMPL_NAME} PUBLIC $<$:${DBG_FLAGS}>) + foreach(TEST_NAME ${TEST_LIST}) + if(${TEST_NAME} STREQUAL genkat) + set(TEST_FILES ${TEST_PATH}/crypto_${CRYPTO}.h ${TEST_PATH}/${TEST_NAME}_${CRYPTO}.c) + else() + set(TEST_FILES ${TEST_PATH}/crypto_${CRYPTO}.h ${TEST_PATH}/${TEST_NAME}.c) + endif() + string(TOUPPER CRYPTO_${CRYPTO} CRYPTO_DEFINE) + set(EXE_NAME ${TEST_NAME}_${IMPL_NAME}) + add_executable(${EXE_NAME} ${TEST_FILES}) + target_compile_definitions(${EXE_NAME} PRIVATE ${CRYPTO_DEFINE}) + target_link_libraries(${EXE_NAME} PRIVATE ${IMPL_NAME}) + if(${TEST_NAME} STREQUAL genkat) + add_test(NAME ${EXE_NAME} COMMAND ${CMAKE_COMMAND} + -DEXE_NAME=${EXE_NAME} -DALG=${ALG}${VER} + -DSRC_DIR=${CMAKE_SOURCE_DIR} -DBIN_DIR=${CMAKE_BINARY_DIR} + -DCONFIG=$ -P ${CMAKE_SOURCE_DIR}/genkat.cmake) + else() + add_test(${EXE_NAME} ${EXE_NAME}) + endif() + endforeach() + endforeach() + endforeach() + endforeach() +endforeach() + diff --git a/ascon/Implementations/LICENSE b/ascon/Implementations/LICENSE new file mode 100644 index 0000000..3bbbc1e --- /dev/null +++ b/ascon/Implementations/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + \ No newline at end of file diff --git a/ascon/Implementations/README.md b/ascon/Implementations/README.md new file mode 100644 index 0000000..a8056c1 --- /dev/null +++ b/ascon/Implementations/README.md @@ -0,0 +1,194 @@ +# Reference and optimized C and ASM implementations of Ascon + +Ascon is a family of lightweight authenticated encryption schemes with associated data (AEAD), including a hash and extendible output function (XOF). + +For more information on Ascon visit: https://ascon.iaik.tugraz.at/ + +This repository contains the following 5 Ascon algorithms: + +- `crypto_aead/ascon128v12`: Ascon-128 v1.2 +- `crypto_aead/ascon128av12`: Ascon-128a v1.2 +- `crypto_aead/ascon80pqv12`: Ascon-80pq v1.2 +- `crypto_hash/asconhashv12`: Ascon-Hash v1.2 +- `crypto_hash/asconxofv12`: Ascon-Xof v1.2 + +and the following implementations: + +- `ref`: reference implementation +- `opt64`: 64-bit speed-optimized C implementation +- `opt64_lowsize`: 64-bit size-optimized C implementation +- `neon`: NEON speed-optimized ARM inline assembly implementation +- `bi32`: 32-bit speed-optimized bit-interleaved C implementation +- `bi32_lowsize`: 32-bit size-optimized bit-interleaved C implementation +- `bi32_lowreg`: 32-bit speed-optimized bit-interleaved C implementation (low register usage) +- `bi32_arm`: 32-bit speed-optimized bit-interleaved ARM inline assembly implementation +- `bi16`: 16-bit optimized bit-interleaved C implementation +- `bi8`: 8-bit optimized bit-interleaved C implementation + + +## Performance results of Ascon-128 on different CPUs in cycles per byte: + +| Message Length in Bytes: | 1 | 8 | 16 | 32 | 64 | 1536 | long | +|:-------------------------|-----:|-----:|-----:|-----:|-----:|-----:|-----:| +| AMD Ryzen 7 1700\* | | | | | 14.5 | 8.8 | 8.6 | +| Intel Xeon E5-2609 v4\* | | | | | 17.3 | 10.8 | 10.5 | +| Cortex-A53 (ARMv8)\* | | | | | 18.3 | 11.3 | 11.0 | +| Intel Core i5-6300U | 367 | 58 | 35 | 23 | 17.6 | 11.9 | 11.4 | +| Intel Core i5-4200U | 521 | 81 | 49 | 32 | 23.9 | 16.2 | 15.8 | +| Cortex-A15 (ARMv7)\* | | | | | 69.8 | 36.2 | 34.6 | +| Cortex-A7 (NEON) | 2182 | 249 | 148 | 97 | 71.7 | 47.5 | 46.5 | +| Cortex-A7 (ARMv7) | 1871 | 292 | 175 | 115 | 86.6 | 58.3 | 57.2 | +| ARM1176JZF-S (ARMv6) | 2189 | 340 | 202 | 133 | 97.9 | 64.4 | 65.3 | + +\* Results taken from eBACS: http://bench.cr.yp.to/ + + +## Performance results of Ascon-128a on different CPUs in cycles per byte: + +| Message Length in Bytes: | 1 | 8 | 16 | 32 | 64 | 1536 | long | +|:-------------------------|-----:|-----:|-----:|-----:|-----:|-----:|-----:| +| AMD Ryzen 7 1700\* | | | | | 12.0 | 6.0 | 5.7 | +| Intel Xeon E5-2609 v4\* | | | | | 14.1 | 7.3 | 6.9 | +| Cortex-A53 (ARMv8)\* | | | | | 15.1 | 7.6 | 7.3 | +| Intel Core i5-6300U | 365 | 47 | 31 | 19 | 13.5 | 8.0 | 7.8 | +| Intel Core i5-4200U | 519 | 67 | 44 | 27 | 18.8 | 11.0 | 10.6 | +| Cortex-A15 (ARMv7)\* | | | | | 60.3 | 25.3 | 23.8 | +| Cortex-A7 (NEON) | 2204 | 226 | 132 | 82 | 55.9 | 31.7 | 30.7 | +| Cortex-A7 (ARMv7) | 1911 | 255 | 161 | 102 | 71.3 | 42.3 | 41.2 | +| ARM1176JZF-S (ARMv6) | 2267 | 303 | 191 | 120 | 84.4 | 50.0 | 50.2 | + +\* Results taken from eBACS: http://bench.cr.yp.to/ + + +## Implementation interface + +All implementations use the interface defined by the ECRYPT Benchmarking of Cryptographic Systems (eBACS): + +- https://bench.cr.yp.to/call-aead.html for CRYPTO\_AEAD (Ascon-128, Ascon-128a, Ascon-80pq) +- https://bench.cr.yp.to/call-hash.html for CRYPTO\_HASH (Ascon-Hash) and XOF (Ascon-Xof) + + +## Manually build and run a single Ascon target: + +Build example for CRYPTO\_AEAD algorithms: + +``` +gcc -march=native -O3 -DNDEBUG -Icrypto_aead/ascon128v12/opt64 crypto_aead/ascon128v12/opt64/*.c -Itests tests/genkat_aead.c -o genkat +gcc -march=native -O3 -DNDEBUG -Icrypto_aead/ascon128v12/opt64 crypto_aead/ascon128v12/opt64/*.c -DCRYPTO_AEAD -Itests tests/getcycles.c -o getcycles +``` + +Build example for CRYPTO\_HASH algorithms: + +``` +gcc -march=native -O3 -DNDEBUG -Icrypto_hash/asconhashv12/opt64 crypto_hash/asconhashv12/opt64/*.c -Itests tests/genkat_hash.c -o genkat +gcc -march=native -O3 -DNDEBUG -Icrypto_hash/asconhashv12/opt64 crypto_hash/asconhashv12/opt64/*.c -DCRYPTO_HASH -Itests tests/getcycles.c -o getcycles +``` + +Generate KATs and get CPU cycles: + +``` +./genkat +./getcycles +``` + + +## Build and test all Ascon v1.2 targets using performance flags: + +``` +mkdir build && cd build +cmake .. -DCMAKE_BUILD_TYPE=Release +cmake --build . +ctest +``` + + +## Build and test all Ascon v1.2 targets using NIST flags and sanitizers: + +``` +mkdir build && cd build +cmake .. -DCMAKE_BUILD_TYPE=Debug +cmake --build . +ctest +``` + + +## Build and run only specific algorithms, implementations and tests: + +Build and test: + +``` +mkdir build && cd build +cmake .. -DVERSION_LIST="v12" -DALG_LIST="ascon128;asconhash" -DIMPL_LIST="opt64;bi32" -DTEST_LIST="genkat;getcycles" +cmake --build . +ctest -R genkat +``` + +Get CPU cycles: + +``` +./getcycles_crypto_aead_ascon128v12_opt64 +./getcycles_crypto_aead_ascon128v12_bi32 +./getcycles_crypto_hash_asconhashv12_opt64 +./getcycles_crypto_hash_asconhashv12_bi32 +``` + + +## Hints to get more reliable getcycles results on Intel/AMD CPUs: + +* Determine the processor base frequency (also called design frequency): + - e.g. using the Intel/AMD website + - or using `lscpu` listed under model name + +* Disable turbo boost (this should lock the frequency to the next value + below the processor base frequency): + ``` + echo 1 | sudo tee /sys/devices/system/cpu/intel_pstate/no_turbo + ``` + +* If the above does not work, manually set the frequency using e.g. `cpufreq-set`. + +* Determine the actual frequency (under load): + - e.g. by watching the frequency using `lscpu` or `cpufreq-info` + +* Determine the scaling factor between the actual and base frequency: + - factor = actual frequency / base frequency + +* Run the getcycles program using the frequency factor and watch the results: + ``` + while true; do ./getcycles_crypto_aead_ascon128v12_opt64 $factor; done + ``` + + +## Hints to activate the performance monitor unit (PMU) on ARM CPUs: + +* First try to install `linux-tools` and see if it works. + +* On many ARM platforms, the PMU has to be enabled using a kernel module: + - Source code for Armv6 (32-bit): + + - Source code for Armv7 (32-bit): + + - Source code for Armv8/Aarch64 (64-bit): + + +* Steps to compile the kernel module on the raspberry pi: + - Find out the kernel version using `uname -a` + - Download the kernel header files, e.g. `raspberrypi-kernel-header` + - Download the source code for the Armv6 kernel module + - Build, install and load the kernel module + + +## Benchmark Ascon v1.2 using supercop + +Download supercop according to the website: http://bench.cr.yp.to/supercop.html + +To test only Ascon, just run the following commands: + +``` +./do-part init +./do-part crypto_aead ascon128v12 +./do-part crypto_aead ascon128av12 +./do-part crypto_aead ascon80pqv12 +./do-part crypto_hash asconhashv12 +./do-part crypto_hash asconxofv12 +``` diff --git a/ascon/Implementations/benchmark.sh b/ascon/Implementations/benchmark.sh new file mode 100644 index 0000000..a348640 --- /dev/null +++ b/ascon/Implementations/benchmark.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +FACTOR=$1 + +for i in getcycles*; do + echo + echo $i: + echo + echo "| 1 | 8 | 16 | 32 | 64 | 1536 | long |" + echo "|------:|------:|------:|------:|------:|------:|------:|" + for n in $(seq 5); do + ./$i $FACTOR | tail -n 1 + done | sort -n -k8 -t'|' +done 2>/dev/null +echo diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi16/api.h b/ascon/Implementations/crypto_aead/ascon128av12/bi16/api.h index a4aa567..951ee9c 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/bi16/api.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi16/api.h @@ -3,3 +3,4 @@ #define CRYPTO_NPUBBYTES 16 #define CRYPTO_ABYTES 16 #define CRYPTO_NOOVERLAP 1 +#define ASCON_RATE 16 diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi16/ascon.c b/ascon/Implementations/crypto_aead/ascon128av12/bi16/ascon.c index b910571..d53881b 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/bi16/ascon.c +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi16/ascon.c @@ -1,19 +1,13 @@ +#include + #include "api.h" #include "endian.h" -typedef unsigned char u8; -typedef unsigned short u16; -typedef unsigned int u32; -typedef unsigned long long u64; - -#define RATE (128 / 8) #define PA_ROUNDS 12 #define PB_ROUNDS 8 -#define EXT_BYTE16(x, n) ((u8)((u16)(x) >> (8 * (1 - (n))))) -#define INS_BYTE16(x, n) ((u16)(x) << (8 * (1 - (n)))) -#define ROTR16(x, n) (((x) >> (n)) | ((x) << (16 - (n)))) -#define ROTL16(x, n) (((x) << (n)) | ((x) >> (16 - (n)))) +#define ROR16(x, n) (((x) >> (n)) | ((x) << (16 - (n)))) +#define ROL16(x, n) (((x) << (n)) | ((x) >> (16 - (n)))) #define COMPRESS_LONG_16(x) \ do { \ @@ -35,24 +29,24 @@ typedef unsigned long long u64; COMPRESS_LONG_16(var_3); \ } while (0) -#define COMPRESS_BYTE_ARRAY_16(a, var_3, var_2, var_1, var_0) \ - do { \ - COMPRESS_U16(U16BIG(((u16 *)(a))[3]), var_3, var_2, var_1, var_0); \ - COMPRESS_U16(U16BIG(((u16 *)(a))[2]), t1_3, t1_2, t1_1, t1_0); \ - var_0 |= t1_0 << 4; \ - var_1 |= t1_1 << 4; \ - var_2 |= t1_2 << 4; \ - var_3 |= t1_3 << 4; \ - COMPRESS_U16(U16BIG(((u16 *)(a))[1]), t1_3, t1_2, t1_1, t1_0); \ - var_0 |= t1_0 << 8; \ - var_1 |= t1_1 << 8; \ - var_2 |= t1_2 << 8; \ - var_3 |= t1_3 << 8; \ - COMPRESS_U16(U16BIG(((u16 *)(a))[0]), t1_3, t1_2, t1_1, t1_0); \ - var_0 |= t1_0 << 12; \ - var_1 |= t1_1 << 12; \ - var_2 |= t1_2 << 12; \ - var_3 |= t1_3 << 12; \ +#define COMPRESS_BYTE_ARRAY_16(a, var_3, var_2, var_1, var_0) \ + do { \ + COMPRESS_U16(U16BIG(((uint16_t*)(a))[3]), var_3, var_2, var_1, var_0); \ + COMPRESS_U16(U16BIG(((uint16_t*)(a))[2]), t1_3, t1_2, t1_1, t1_0); \ + var_0 |= t1_0 << 4; \ + var_1 |= t1_1 << 4; \ + var_2 |= t1_2 << 4; \ + var_3 |= t1_3 << 4; \ + COMPRESS_U16(U16BIG(((uint16_t*)(a))[1]), t1_3, t1_2, t1_1, t1_0); \ + var_0 |= t1_0 << 8; \ + var_1 |= t1_1 << 8; \ + var_2 |= t1_2 << 8; \ + var_3 |= t1_3 << 8; \ + COMPRESS_U16(U16BIG(((uint16_t*)(a))[0]), t1_3, t1_2, t1_1, t1_0); \ + var_0 |= t1_0 << 12; \ + var_1 |= t1_1 << 12; \ + var_2 |= t1_2 << 12; \ + var_3 |= t1_3 << 12; \ } while (0) #define EXPAND_SHORT_16(x) \ @@ -76,28 +70,28 @@ typedef unsigned long long u64; *var = U16BIG(t0_0 | (t0_1 << 1) | (t0_2 << 2) | (t0_3 << 3)); \ } while (0) -#define EXPAND_BYTE_ARRAY_16(a, var_3, var_2, var_1, var_0) \ - do { \ - EXPAND_U16((((u16 *)(a)) + 3), var_3, var_2, var_1, var_0); \ - t1_3 = var_3 >> 4; \ - t1_2 = var_2 >> 4; \ - t1_1 = var_1 >> 4; \ - t1_0 = var_0 >> 4; \ - EXPAND_U16((((u16 *)(a)) + 2), t1_3, t1_2, t1_1, t1_0); \ - t1_3 >>= 4; \ - t1_2 >>= 4; \ - t1_1 >>= 4; \ - t1_0 >>= 4; \ - EXPAND_U16((((u16 *)(a)) + 1), t1_3, t1_2, t1_1, t1_0); \ - t1_3 >>= 4; \ - t1_2 >>= 4; \ - t1_1 >>= 4; \ - t1_0 >>= 4; \ - EXPAND_U16((((u16 *)(a)) + 0), t1_3, t1_2, t1_1, t1_0); \ +#define EXPAND_BYTE_ARRAY_16(a, var_3, var_2, var_1, var_0) \ + do { \ + EXPAND_U16((((uint16_t*)(a)) + 3), var_3, var_2, var_1, var_0); \ + t1_3 = var_3 >> 4; \ + t1_2 = var_2 >> 4; \ + t1_1 = var_1 >> 4; \ + t1_0 = var_0 >> 4; \ + EXPAND_U16((((uint16_t*)(a)) + 2), t1_3, t1_2, t1_1, t1_0); \ + t1_3 >>= 4; \ + t1_2 >>= 4; \ + t1_1 >>= 4; \ + t1_0 >>= 4; \ + EXPAND_U16((((uint16_t*)(a)) + 1), t1_3, t1_2, t1_1, t1_0); \ + t1_3 >>= 4; \ + t1_2 >>= 4; \ + t1_1 >>= 4; \ + t1_0 >>= 4; \ + EXPAND_U16((((uint16_t*)(a)) + 0), t1_3, t1_2, t1_1, t1_0); \ } while (0) -// This way of implementing Ascon's S-box was inpired by personal communication -// with Joan Daemen about implementing the 3-bit chi layer. +/* This way of implementing Ascon's S-box was inpired by personal communication + with Joan Daemen about implementing the 3-bit chi layer. */ #define ROUND_16(C_3, C_2, C_1, C_0) \ do { \ /* round constant */ \ @@ -179,62 +173,62 @@ typedef unsigned long long u64; t0_1 = x0_1; \ t0_2 = x0_2; \ t0_3 = x0_3; \ - x0_1 ^= ROTR16(t0_0, 5); \ - x0_2 ^= ROTR16(t0_1, 5); \ - x0_3 ^= ROTR16(t0_2, 5); \ - x0_0 ^= ROTR16(t0_3, 4); \ - x0_0 ^= ROTR16(t0_0, 7); \ - x0_1 ^= ROTR16(t0_1, 7); \ - x0_2 ^= ROTR16(t0_2, 7); \ - x0_3 ^= ROTR16(t0_3, 7); \ + x0_1 ^= ROR16(t0_0, 5); \ + x0_2 ^= ROR16(t0_1, 5); \ + x0_3 ^= ROR16(t0_2, 5); \ + x0_0 ^= ROR16(t0_3, 4); \ + x0_0 ^= ROR16(t0_0, 7); \ + x0_1 ^= ROR16(t0_1, 7); \ + x0_2 ^= ROR16(t0_2, 7); \ + x0_3 ^= ROR16(t0_3, 7); \ t0_0 = x1_0; \ t0_1 = x1_1; \ t0_2 = x1_2; \ t0_3 = x1_3; \ x1_3 ^= t0_0; \ - x1_0 ^= ROTL16(t0_1, 1); \ - x1_1 ^= ROTL16(t0_2, 1); \ - x1_2 ^= ROTL16(t0_3, 1); \ - x1_1 ^= ROTL16(t0_0, 6); \ - x1_2 ^= ROTL16(t0_1, 6); \ - x1_3 ^= ROTL16(t0_2, 6); \ - x1_0 ^= ROTL16(t0_3, 7); \ + x1_0 ^= ROL16(t0_1, 1); \ + x1_1 ^= ROL16(t0_2, 1); \ + x1_2 ^= ROL16(t0_3, 1); \ + x1_1 ^= ROL16(t0_0, 6); \ + x1_2 ^= ROL16(t0_1, 6); \ + x1_3 ^= ROL16(t0_2, 6); \ + x1_0 ^= ROL16(t0_3, 7); \ t0_0 = x2_0; \ t0_1 = x2_1; \ t0_2 = x2_2; \ t0_3 = x2_3; \ - x2_3 ^= ROTR16(t0_0, 1); \ + x2_3 ^= ROR16(t0_0, 1); \ x2_0 ^= t0_1; \ x2_1 ^= t0_2; \ x2_2 ^= t0_3; \ - x2_2 ^= ROTR16(t0_0, 2); \ - x2_3 ^= ROTR16(t0_1, 2); \ - x2_0 ^= ROTR16(t0_2, 1); \ - x2_1 ^= ROTR16(t0_3, 1); \ + x2_2 ^= ROR16(t0_0, 2); \ + x2_3 ^= ROR16(t0_1, 2); \ + x2_0 ^= ROR16(t0_2, 1); \ + x2_1 ^= ROR16(t0_3, 1); \ t0_0 = x3_0; \ t0_1 = x3_1; \ t0_2 = x3_2; \ t0_3 = x3_3; \ - x3_2 ^= ROTR16(t0_0, 3); \ - x3_3 ^= ROTR16(t0_1, 3); \ - x3_0 ^= ROTR16(t0_2, 2); \ - x3_1 ^= ROTR16(t0_3, 2); \ - x3_3 ^= ROTR16(t0_0, 5); \ - x3_0 ^= ROTR16(t0_1, 4); \ - x3_1 ^= ROTR16(t0_2, 4); \ - x3_2 ^= ROTR16(t0_3, 4); \ + x3_2 ^= ROR16(t0_0, 3); \ + x3_3 ^= ROR16(t0_1, 3); \ + x3_0 ^= ROR16(t0_2, 2); \ + x3_1 ^= ROR16(t0_3, 2); \ + x3_3 ^= ROR16(t0_0, 5); \ + x3_0 ^= ROR16(t0_1, 4); \ + x3_1 ^= ROR16(t0_2, 4); \ + x3_2 ^= ROR16(t0_3, 4); \ t0_0 = x4_0; \ t0_1 = x4_1; \ t0_2 = x4_2; \ t0_3 = x4_3; \ - x4_1 ^= ROTR16(t0_0, 2); \ - x4_2 ^= ROTR16(t0_1, 2); \ - x4_3 ^= ROTR16(t0_2, 2); \ - x4_0 ^= ROTR16(t0_3, 1); \ - x4_3 ^= ROTL16(t0_0, 5); \ - x4_0 ^= ROTL16(t0_1, 6); \ - x4_1 ^= ROTL16(t0_2, 6); \ - x4_2 ^= ROTL16(t0_3, 6); \ + x4_1 ^= ROR16(t0_0, 2); \ + x4_2 ^= ROR16(t0_1, 2); \ + x4_3 ^= ROR16(t0_2, 2); \ + x4_0 ^= ROR16(t0_3, 1); \ + x4_3 ^= ROL16(t0_0, 5); \ + x4_0 ^= ROL16(t0_1, 6); \ + x4_1 ^= ROL16(t0_2, 6); \ + x4_2 ^= ROL16(t0_3, 6); \ } while (0) #define P12_16 \ @@ -265,45 +259,44 @@ typedef unsigned long long u64; ROUND_16(1, 2, 1, 1); \ } while (0) -int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, - const unsigned char *m, unsigned long long mlen, - const unsigned char *ad, unsigned long long adlen, - const unsigned char *nsec, const unsigned char *npub, - const unsigned char *k) { - u64 rlen; - u64 i; - - u8 buffer[16]; - - u16 K0_0; - u16 K1_0; - u16 N0_0; - u16 N1_0; - u16 x0_0, x1_0, x2_0, x3_0, x4_0; - u16 t0_0, t1_0; - - u16 K0_1; - u16 K1_1; - u16 N0_1; - u16 N1_1; - u16 x0_1, x1_1, x2_1, x3_1, x4_1; - u16 t0_1, t1_1; - - u16 K0_2; - u16 K1_2; - u16 N0_2; - u16 N1_2; - u16 x0_2, x1_2, x2_2, x3_2, x4_2; - u16 t0_2, t1_2; - - u16 K0_3; - u16 K1_3; - u16 N0_3; - u16 N1_3; - u16 x0_3, x1_3, x2_3, x3_3, x4_3; - u16 t0_3, t1_3; - - u16 in_0, in_1, in_2, in_3; +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + uint64_t rlen; + uint64_t i; + + uint8_t buffer[16]; + + uint16_t K0_0; + uint16_t K1_0; + uint16_t N0_0; + uint16_t N1_0; + uint16_t x0_0, x1_0, x2_0, x3_0, x4_0; + uint16_t t0_0, t1_0; + + uint16_t K0_1; + uint16_t K1_1; + uint16_t N0_1; + uint16_t N1_1; + uint16_t x0_1, x1_1, x2_1, x3_1, x4_1; + uint16_t t0_1, t1_1; + + uint16_t K0_2; + uint16_t K1_2; + uint16_t N0_2; + uint16_t N1_2; + uint16_t x0_2, x1_2, x2_2, x3_2, x4_2; + uint16_t t0_2, t1_2; + + uint16_t K0_3; + uint16_t K1_3; + uint16_t N0_3; + uint16_t N1_3; + uint16_t x0_3, x1_3, x2_3, x3_3, x4_3; + uint16_t t0_3, t1_3; + + uint16_t in_0, in_1, in_2, in_3; (void)nsec; @@ -312,8 +305,8 @@ int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, COMPRESS_BYTE_ARRAY_16(npub, N0_3, N0_2, N0_1, N0_0); COMPRESS_BYTE_ARRAY_16(npub + 8, N1_3, N1_2, N1_1, N1_0); - // initialization - t1_0 = (u16)((CRYPTO_KEYBYTES * 8) << 8 | (RATE * 8) << 0); + /* initialization */ + t1_0 = (uint16_t)((CRYPTO_KEYBYTES * 8) << 8 | (ASCON_RATE * 8) << 0); t1_1 = t1_0 >> 1; t1_2 = t1_1 >> 1; t1_3 = t1_2 >> 1; @@ -325,7 +318,7 @@ int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, x0_1 = t1_1 << 12; x0_2 = t1_2 << 12; x0_3 = t1_3 << 12; - t1_0 = (u16)(PA_ROUNDS << 8 | PB_ROUNDS << 0); + t1_0 = (uint16_t)(PA_ROUNDS << 8 | PB_ROUNDS << 0); t1_1 = t1_0 >> 1; t1_2 = t1_1 >> 1; t1_3 = t1_2 >> 1; @@ -362,10 +355,10 @@ int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, x4_1 ^= K1_1; x4_2 ^= K1_2; x4_3 ^= K1_3; - // process associated data + /* process associated data */ if (adlen) { rlen = adlen; - while (rlen >= RATE) { + while (rlen >= ASCON_RATE) { COMPRESS_BYTE_ARRAY_16(ad, in_3, in_2, in_1, in_0); x0_0 ^= in_0; x0_1 ^= in_1; @@ -377,8 +370,8 @@ int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, x1_2 ^= in_2; x1_3 ^= in_3; P8_16; - rlen -= RATE; - ad += RATE; + rlen -= ASCON_RATE; + ad += ASCON_RATE; } for (i = 0; i < rlen; ++i, ++ad) buffer[i] = *ad; buffer[rlen] = 0x80; @@ -397,9 +390,9 @@ int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, } x4_0 ^= 1; - // process plaintext + /* process plaintext */ rlen = mlen; - while (rlen >= RATE) { + while (rlen >= ASCON_RATE) { COMPRESS_BYTE_ARRAY_16(m, in_3, in_2, in_1, in_0); x0_0 ^= in_0; x0_1 ^= in_1; @@ -413,9 +406,9 @@ int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, EXPAND_BYTE_ARRAY_16(c, x0_3, x0_2, x0_1, x0_0); EXPAND_BYTE_ARRAY_16(c + 8, x1_3, x1_2, x1_1, x1_0); P8_16; - rlen -= RATE; - m += RATE; - c += RATE; + rlen -= ASCON_RATE; + m += ASCON_RATE; + c += ASCON_RATE; } for (i = 0; i < rlen; ++i, ++m) buffer[i] = *m; buffer[rlen] = 0x80; @@ -434,7 +427,7 @@ int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, EXPAND_BYTE_ARRAY_16(buffer + 8, x1_3, x1_2, x1_1, x1_0); for (i = 0; i < rlen; ++i, ++c) *c = buffer[i]; - // finalization + /* finalization */ x2_0 ^= K0_0; x2_1 ^= K0_1; x2_2 ^= K0_2; @@ -453,7 +446,7 @@ int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, x4_2 ^= K1_2; x4_3 ^= K1_3; - // return tag + /* return tag */ EXPAND_BYTE_ARRAY_16(c, x3_3, x3_2, x3_1, x3_0); c += 8; EXPAND_BYTE_ARRAY_16(c, x4_3, x4_2, x4_1, x4_0); @@ -462,49 +455,47 @@ int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, return 0; } -int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, - unsigned char *nsec, const unsigned char *c, - unsigned long long clen, const unsigned char *ad, - unsigned long long adlen, const unsigned char *npub, - const unsigned char *k) { +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { *mlen = 0; if (clen < CRYPTO_ABYTES) return -1; - u16 ret_val; - u64 rlen; - u64 i; - - u8 buffer[16]; - - u16 K0_0; - u16 K1_0; - u16 N0_0; - u16 N1_0; - u16 x0_0, x1_0, x2_0, x3_0, x4_0; - u16 t0_0, t1_0; - - u16 K0_1; - u16 K1_1; - u16 N0_1; - u16 N1_1; - u16 x0_1, x1_1, x2_1, x3_1, x4_1; - u16 t0_1, t1_1; - - u16 K0_2; - u16 K1_2; - u16 N0_2; - u16 N1_2; - u16 x0_2, x1_2, x2_2, x3_2, x4_2; - u16 t0_2, t1_2; - - u16 K0_3; - u16 K1_3; - u16 N0_3; - u16 N1_3; - u16 x0_3, x1_3, x2_3, x3_3, x4_3; - u16 t0_3, t1_3; - - u16 in_0, in_1, in_2, in_3; + uint16_t ret_val; + uint64_t rlen; + uint64_t i; + + uint8_t buffer[16]; + + uint16_t K0_0; + uint16_t K1_0; + uint16_t N0_0; + uint16_t N1_0; + uint16_t x0_0, x1_0, x2_0, x3_0, x4_0; + uint16_t t0_0, t1_0; + + uint16_t K0_1; + uint16_t K1_1; + uint16_t N0_1; + uint16_t N1_1; + uint16_t x0_1, x1_1, x2_1, x3_1, x4_1; + uint16_t t0_1, t1_1; + + uint16_t K0_2; + uint16_t K1_2; + uint16_t N0_2; + uint16_t N1_2; + uint16_t x0_2, x1_2, x2_2, x3_2, x4_2; + uint16_t t0_2, t1_2; + + uint16_t K0_3; + uint16_t K1_3; + uint16_t N0_3; + uint16_t N1_3; + uint16_t x0_3, x1_3, x2_3, x3_3, x4_3; + uint16_t t0_3, t1_3; + + uint16_t in_0, in_1, in_2, in_3; (void)nsec; @@ -513,8 +504,8 @@ int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, COMPRESS_BYTE_ARRAY_16(npub, N0_3, N0_2, N0_1, N0_0); COMPRESS_BYTE_ARRAY_16(npub + 8, N1_3, N1_2, N1_1, N1_0); - // initialization - t1_0 = (u16)((CRYPTO_KEYBYTES * 8) << 8 | (RATE * 8) << 0); + /* initialization */ + t1_0 = (uint16_t)((CRYPTO_KEYBYTES * 8) << 8 | (ASCON_RATE * 8) << 0); t1_1 = t1_0 >> 1; t1_2 = t1_1 >> 1; t1_3 = t1_2 >> 1; @@ -526,7 +517,7 @@ int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, x0_1 = t1_1 << 12; x0_2 = t1_2 << 12; x0_3 = t1_3 << 12; - t1_0 = (u16)(PA_ROUNDS << 8 | PB_ROUNDS << 0); + t1_0 = (uint16_t)(PA_ROUNDS << 8 | PB_ROUNDS << 0); t1_1 = t1_0 >> 1; t1_2 = t1_1 >> 1; t1_3 = t1_2 >> 1; @@ -563,10 +554,10 @@ int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, x4_1 ^= K1_1; x4_2 ^= K1_2; x4_3 ^= K1_3; - // process associated data + /* process associated data */ if (adlen) { rlen = adlen; - while (rlen >= RATE) { + while (rlen >= ASCON_RATE) { COMPRESS_BYTE_ARRAY_16(ad, in_3, in_2, in_1, in_0); x0_0 ^= in_0; x0_1 ^= in_1; @@ -578,8 +569,8 @@ int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, x1_2 ^= in_2; x1_3 ^= in_3; P8_16; - rlen -= RATE; - ad += RATE; + rlen -= ASCON_RATE; + ad += ASCON_RATE; } for (i = 0; i < rlen; ++i, ++ad) buffer[i] = *ad; buffer[rlen] = 0x80; @@ -598,31 +589,31 @@ int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, } x4_0 ^= 1; - // process plaintext + /* process plaintext */ rlen = clen - CRYPTO_KEYBYTES; - while (rlen >= RATE) { + while (rlen >= ASCON_RATE) { EXPAND_U16(&t1_0, x0_3, x0_2, x0_1, x0_0); EXPAND_U16(&t1_1, x0_3 >> 4, x0_2 >> 4, x0_1 >> 4, x0_0 >> 4); EXPAND_U16(&t1_2, x0_3 >> 8, x0_2 >> 8, x0_1 >> 8, x0_0 >> 8); EXPAND_U16(&t1_3, x0_3 >> 12, x0_2 >> 12, x0_1 >> 12, x0_0 >> 12); - ((u16 *)m)[0] = (t1_3) ^ ((u16 *)c)[0]; - ((u16 *)m)[1] = (t1_2) ^ ((u16 *)c)[1]; - ((u16 *)m)[2] = (t1_1) ^ ((u16 *)c)[2]; - ((u16 *)m)[3] = (t1_0) ^ ((u16 *)c)[3]; + ((uint16_t*)m)[0] = (t1_3) ^ ((uint16_t*)c)[0]; + ((uint16_t*)m)[1] = (t1_2) ^ ((uint16_t*)c)[1]; + ((uint16_t*)m)[2] = (t1_1) ^ ((uint16_t*)c)[2]; + ((uint16_t*)m)[3] = (t1_0) ^ ((uint16_t*)c)[3]; EXPAND_U16(&t1_0, x1_3, x1_2, x1_1, x1_0); EXPAND_U16(&t1_1, x1_3 >> 4, x1_2 >> 4, x1_1 >> 4, x1_0 >> 4); EXPAND_U16(&t1_2, x1_3 >> 8, x1_2 >> 8, x1_1 >> 8, x1_0 >> 8); EXPAND_U16(&t1_3, x1_3 >> 12, x1_2 >> 12, x1_1 >> 12, x1_0 >> 12); - ((u16 *)m)[4] = (t1_3) ^ ((u16 *)c)[4]; - ((u16 *)m)[5] = (t1_2) ^ ((u16 *)c)[5]; - ((u16 *)m)[6] = (t1_1) ^ ((u16 *)c)[6]; - ((u16 *)m)[7] = (t1_0) ^ ((u16 *)c)[7]; + ((uint16_t*)m)[4] = (t1_3) ^ ((uint16_t*)c)[4]; + ((uint16_t*)m)[5] = (t1_2) ^ ((uint16_t*)c)[5]; + ((uint16_t*)m)[6] = (t1_1) ^ ((uint16_t*)c)[6]; + ((uint16_t*)m)[7] = (t1_0) ^ ((uint16_t*)c)[7]; COMPRESS_BYTE_ARRAY_16(c, x0_3, x0_2, x0_1, x0_0); COMPRESS_BYTE_ARRAY_16(c + 8, x1_3, x1_2, x1_1, x1_0); P8_16; - rlen -= RATE; - m += RATE; - c += RATE; + rlen -= ASCON_RATE; + m += ASCON_RATE; + c += ASCON_RATE; } EXPAND_BYTE_ARRAY_16(buffer, x0_3, x0_2, x0_1, x0_0); EXPAND_BYTE_ARRAY_16(buffer + 8, x1_3, x1_2, x1_1, x1_0); @@ -635,7 +626,7 @@ int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, COMPRESS_BYTE_ARRAY_16(buffer, x0_3, x0_2, x0_1, x0_0); COMPRESS_BYTE_ARRAY_16(buffer + 8, x1_3, x1_2, x1_1, x1_0); - // finalization + /* finalization */ x2_0 ^= K0_0; x2_1 ^= K0_1; x2_2 ^= K0_2; @@ -654,32 +645,31 @@ int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, x4_2 ^= K1_2; x4_3 ^= K1_3; - // return -1 if verification fails + /* return -1 if verification fails */ ret_val = 0; EXPAND_U16(&t1_0, x3_3, x3_2, x3_1, x3_0); EXPAND_U16(&t1_1, x3_3 >> 4, x3_2 >> 4, x3_1 >> 4, x3_0 >> 4); EXPAND_U16(&t1_2, x3_3 >> 8, x3_2 >> 8, x3_1 >> 8, x3_0 >> 8); EXPAND_U16(&t1_3, x3_3 >> 12, x3_2 >> 12, x3_1 >> 12, x3_0 >> 12); - ret_val |= ((u16 *)c)[0] ^ (t1_3); - ret_val |= ((u16 *)c)[1] ^ (t1_2); - ret_val |= ((u16 *)c)[2] ^ (t1_1); - ret_val |= ((u16 *)c)[3] ^ (t1_0); + ret_val |= ((uint16_t*)c)[0] ^ (t1_3); + ret_val |= ((uint16_t*)c)[1] ^ (t1_2); + ret_val |= ((uint16_t*)c)[2] ^ (t1_1); + ret_val |= ((uint16_t*)c)[3] ^ (t1_0); EXPAND_U16(&t1_0, x4_3, x4_2, x4_1, x4_0); EXPAND_U16(&t1_1, x4_3 >> 4, x4_2 >> 4, x4_1 >> 4, x4_0 >> 4); EXPAND_U16(&t1_2, x4_3 >> 8, x4_2 >> 8, x4_1 >> 8, x4_0 >> 8); EXPAND_U16(&t1_3, x4_3 >> 12, x4_2 >> 12, x4_1 >> 12, x4_0 >> 12); - ret_val |= ((u16 *)c)[4] ^ (t1_3); - ret_val |= ((u16 *)c)[5] ^ (t1_2); - ret_val |= ((u16 *)c)[6] ^ (t1_1); - ret_val |= ((u16 *)c)[7] ^ (t1_0); + ret_val |= ((uint16_t*)c)[4] ^ (t1_3); + ret_val |= ((uint16_t*)c)[5] ^ (t1_2); + ret_val |= ((uint16_t*)c)[6] ^ (t1_1); + ret_val |= ((uint16_t*)c)[7] ^ (t1_0); if (ret_val != 0) return -1; - // return plaintext + /* return plaintext */ *mlen = clen - CRYPTO_ABYTES; return 0; } - diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi16/endian.h b/ascon/Implementations/crypto_aead/ascon128av12/bi16/endian.h index ffbdcd9..3944360 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/bi16/endian.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi16/endian.h @@ -3,29 +3,37 @@ #if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -// macros for big endian machines +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif #define U64BIG(x) (x) #define U32BIG(x) (x) #define U16BIG(x) (x) #elif defined(_MSC_VER) || \ - (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -// macros for little endian machines -#define U64BIG(x) \ - ((((x) & 0x00000000000000FFULL) << 56) | (((x) & 0x000000000000FF00ULL) << 40) | \ - (((x) & 0x0000000000FF0000ULL) << 24) | (((x) & 0x00000000FF000000ULL) << 8) | \ - (((x) & 0x000000FF00000000ULL) >> 8) | (((x) & 0x0000FF0000000000ULL) >> 24) | \ - (((x) & 0x00FF000000000000ULL) >> 40) | (((x) & 0xFF00000000000000ULL) >> 56)) +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) #define U32BIG(x) \ - ((((x) & 0x000000FF) << 24) | (((x) & 0x0000FF00) << 8) | \ - (((x) & 0x00FF0000) >> 8) | (((x) & 0xFF000000) >> 24)) -#define U16BIG(x) \ - ((((x) & 0x00FF) << 8) | (((x) & 0xFF00) >> 8)) + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) #else -#error "ascon byte order macros not defined in endian.h" +#error "Ascon byte order macros not defined in endian.h" #endif -#endif // ENDIAN_H_ - +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32/api.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32/api.h index a4aa567..951ee9c 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/bi32/api.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32/api.h @@ -3,3 +3,4 @@ #define CRYPTO_NPUBBYTES 16 #define CRYPTO_ABYTES 16 #define CRYPTO_NOOVERLAP 1 +#define ASCON_RATE 16 diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32/ascon.c b/ascon/Implementations/crypto_aead/ascon128av12/bi32/ascon.c new file mode 100644 index 0000000..9011a77 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32/ascon.c @@ -0,0 +1,250 @@ +#include "ascon.h" + +#include "api.h" +#include "loadstore.h" +#include "permutations.h" +#include "printstate.h" + +__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2, + const uint8_t* k) { + KINIT(K0, K1, K2); + if (CRYPTO_KEYBYTES == 20) { + XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4))); + k += 4; + } + XOR(*K1, LOAD64(k)); + XOR(*K2, LOAD64(k + 8)); +} + +__forceinline void init(state_t* s, const uint8_t* npub, const uint8_t* k) { + word_t N0, N1; + word_t K0, K1, K2; + /* load nonce */ + N0 = LOAD64(npub); + N1 = LOAD64(npub + 8); + /* load key */ + loadkey(&K0, &K1, &K2, k); + /* initialization */ + PINIT(s); + XOR(s->x0, IV); + if (CRYPTO_KEYBYTES == 20) XOR(s->x0, K0); + XOR(s->x1, K1); + XOR(s->x2, K2); + XOR(s->x3, N0); + XOR(s->x4, N1); + P12(s); + if (CRYPTO_KEYBYTES == 20) XOR(s->x2, K0); + XOR(s->x3, K1); + XOR(s->x4, K2); + printstate("initialization", s); +} + +__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + word_t* restrict px; + /* process associated data */ + if (adlen) { + while (adlen >= ASCON_RATE) { + XOR(s->x0, LOAD64(ad)); + if (ASCON_RATE == 16) XOR(s->x1, LOAD64(ad + 8)); + PB(s); + ad += ASCON_RATE; + adlen -= ASCON_RATE; + } + /* final associated data block */ + px = &s->x0; + if (ASCON_RATE == 16 && adlen >= 8) { + XOR(s->x0, LOAD64(ad)); + px = &s->x1; + ad += 8; + adlen -= 8; + } + if (adlen) XOR(*px, LOAD(ad, adlen)); + XOR(*px, PAD(adlen)); + PB(s); + } + XOR(s->x4, WORD_T(1)); + printstate("process associated data", s); +} + +__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m, + uint64_t mlen) { + word_t* restrict px; + /* process plaintext */ + while (mlen >= ASCON_RATE) { + XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + if (ASCON_RATE == 16) { + XOR(s->x1, LOAD64(m + 8)); + STORE64(c + 8, s->x1); + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + mlen -= ASCON_RATE; + } + /* final plaintext block */ + px = &s->x0; + if (ASCON_RATE == 16 && mlen >= 8) { + XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + px = &s->x1; + m += 8; + c += 8; + mlen -= 8; + } + if (mlen) { + XOR(*px, LOAD(m, mlen)); + STORE(c, *px, mlen); + } + XOR(*px, PAD(mlen)); + printstate("process plaintext", s); +} + +__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c, + uint64_t clen) { + word_t* restrict px; + word_t cx; + /* process ciphertext */ + while (clen >= ASCON_RATE) { + cx = LOAD64(c); + XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + if (ASCON_RATE == 16) { + cx = LOAD64(c + 8); + XOR(s->x1, cx); + STORE64(m + 8, s->x1); + s->x1 = cx; + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + clen -= ASCON_RATE; + } + /* final ciphertext block */ + px = &s->x0; + if (ASCON_RATE == 16 && clen >= 8) { + cx = LOAD64(c); + XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + px = &s->x1; + m += 8; + c += 8; + clen -= 8; + } + if (clen) { + cx = LOAD(c, clen); + XOR(*px, cx); + STORE(m, *px, clen); + AND(*px, XMASK(clen)); + XOR(*px, cx); + } + XOR(*px, PAD(clen)); + printstate("process ciphertext", s); +} + +__forceinline void final(state_t* s, const uint8_t* k) { + word_t K0, K1, K2; + /* load key */ + loadkey(&K0, &K1, &K2, k); + /* finalization */ + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) { + XOR(s->x1, K1); + XOR(s->x2, K2); + } + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { + XOR(s->x2, K1); + XOR(s->x3, K2); + } + if (CRYPTO_KEYBYTES == 20) { + XOR(s->x1, KEYROT(K0, K1)); + XOR(s->x2, KEYROT(K1, K2)); + XOR(s->x3, KEYROT(K2, WORD_T(0))); + } + P12(s); + XOR(s->x3, K1); + XOR(s->x4, K2); + printstate("finalization", s); +} + +#if !ASCON_INLINE_MODE + +#define INIT ascon_init +#define ABSORB ascon_absorb +#define ENCRYPT ascon_encrypt +#define DECRYPT ascon_decrypt +#define FINAL ascon_final + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) { + init(s, npub, k); +} + +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + absorb(s, ad, adlen); +} + +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) { + encrypt(s, c, m, mlen); +} + +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) { + decrypt(s, m, c, clen); +} + +void ascon_final(state_t* s, const uint8_t* k) { final(s, k); } + +#else + +#define INIT init +#define ABSORB absorb +#define ENCRYPT encrypt +#define DECRYPT decrypt +#define FINAL final + +#endif + +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + state_t s; + (void)nsec; + *clen = mlen + CRYPTO_ABYTES; + /* perform ascon computation */ + INIT(&s, npub, k); + ABSORB(&s, ad, adlen); + ENCRYPT(&s, c, m, mlen); + FINAL(&s, k); + /* set tag */ + c += mlen; + STORE64(c, s.x3); + STORE64(c + 8, s.x4); + return 0; +} + +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { + state_t s; + (void)nsec; + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + *mlen = clen = clen - CRYPTO_ABYTES; + /* perform ascon computation */ + INIT(&s, npub, k); + ABSORB(&s, ad, adlen); + DECRYPT(&s, m, c, clen); + FINAL(&s, k); + /* verify tag (should be constant time, check compiler output) */ + c += clen; + XOR(s.x3, LOAD64(c)); + XOR(s.x4, LOAD64(c + 8)); + if (NOTZERO(s.x3, s.x4)) { + *mlen = 0; + return -1; + } + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32/ascon.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32/ascon.h new file mode 100644 index 0000000..10a5b6e --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32/ascon.h @@ -0,0 +1,19 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#include "config.h" +#include "word.h" + +typedef struct { + word_t x0, x1, x2, x3, x4; +} state_t; + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k); +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen); +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); +void ascon_final(state_t* s, const uint8_t* k); + +#endif // ASCON_H_ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32/config.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32/config.h new file mode 100644 index 0000000..b1b5080 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32/config.h @@ -0,0 +1,34 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* inline the Ascon mode */ +#ifndef ASCON_INLINE_MODE +#define ASCON_INLINE_MODE 1 +#endif + +/* inline the Ascon permutations */ +#ifndef ASCON_INLINE_PERM +#define ASCON_INLINE_PERM 0 +#endif + +/* single function for all permutations */ +#ifndef ASCON_SINGLE_PERM +#define ASCON_SINGLE_PERM 0 +#endif + +/* unroll the permutation loops */ +#ifndef ASCON_UNROLL_LOOPS +#define ASCON_UNROLL_LOOPS 0 +#endif + +/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ +#ifndef ASCON_DATA_ACCESS +#define ASCON_DATA_ACCESS 'M' +#endif + +/* make sure __forceinline is supported */ +#ifndef __forceinline +#define __forceinline inline __attribute__((always_inline)) +#endif + +#endif /* CONFIG_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32/endian.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32/endian.h index ffbdcd9..3944360 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/bi32/endian.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32/endian.h @@ -3,29 +3,37 @@ #if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -// macros for big endian machines +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif #define U64BIG(x) (x) #define U32BIG(x) (x) #define U16BIG(x) (x) #elif defined(_MSC_VER) || \ - (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -// macros for little endian machines -#define U64BIG(x) \ - ((((x) & 0x00000000000000FFULL) << 56) | (((x) & 0x000000000000FF00ULL) << 40) | \ - (((x) & 0x0000000000FF0000ULL) << 24) | (((x) & 0x00000000FF000000ULL) << 8) | \ - (((x) & 0x000000FF00000000ULL) >> 8) | (((x) & 0x0000FF0000000000ULL) >> 24) | \ - (((x) & 0x00FF000000000000ULL) >> 40) | (((x) & 0xFF00000000000000ULL) >> 56)) +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) #define U32BIG(x) \ - ((((x) & 0x000000FF) << 24) | (((x) & 0x0000FF00) << 8) | \ - (((x) & 0x00FF0000) >> 8) | (((x) & 0xFF000000) >> 24)) -#define U16BIG(x) \ - ((((x) & 0x00FF) << 8) | (((x) & 0xFF00) >> 8)) + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) #else -#error "ascon byte order macros not defined in endian.h" +#error "Ascon byte order macros not defined in endian.h" #endif -#endif // ENDIAN_H_ - +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32/loadstore.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32/loadstore.h new file mode 100644 index 0000000..6cb0cca --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32/loadstore.h @@ -0,0 +1,118 @@ +#ifndef LOADSTORE_H_ +#define LOADSTORE_H_ + +#include + +#include "config.h" +#include "endian.h" +#include "word.h" + +/* 64-bit LSB mask (undefined for n == 0) */ +#define MASK(n) (~0ull >> (64 - (n))) + +/* get byte from Ascon 64-bit word */ +#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i)))) + +/* set byte in Ascon 64-bit word */ +#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i))) + +#if ASCON_DATA_ACCESS == 'W' + +#ifndef NDEBUG +#pragma message("Using wordwise data access") +#endif + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = U64BIG(*(uint64_t*)bytes); + return U64TOWORD(x); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = U64BIG(*(uint64_t*)bytes & MASK(8 * n)); + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(8 * n); + *(uint64_t*)bytes |= U64BIG(x); +} + +#elif ASCON_DATA_ACCESS == 'M' + +#ifndef NDEBUG +#pragma message("Using memcpy to access data") +#endif + +#include + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + memcpy((uint8_t*)&x, bytes, n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = U64BIG(WORDTOU64(w)); + memcpy(bytes, (uint8_t*)&x, n); +} + +#elif ASCON_DATA_ACCESS == 'B' + +#ifndef NDEBUG +#pragma message("Using bytewise data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#elif ASCON_DATA_ACCESS == 'H' + +#ifndef NDEBUG +#pragma message("Using hybrid data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + if (n == 8) + x = U64BIG(*(uint64_t*)bytes); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + if (n == 8) + *(uint64_t*)bytes = U64BIG(x); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#else +#error "Ascon data access macro not defined correctly" +#endif + +#endif /* LOADSTORE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32/permutations.c b/ascon/Implementations/crypto_aead/ascon128av12/bi32/permutations.c new file mode 100644 index 0000000..1bca2ef --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32/permutations.c @@ -0,0 +1,35 @@ +#include "permutations.h" + +#include "round.h" + +#if !ASCON_UNROLL_LOOPS || ASCON_SINGLE_PERM + +const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, + {0x6, 0xc}, {0x3, 0xc}, {0x6, 0x9}, {0x3, 0x9}, + {0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}}; + +#endif + +#if ASCON_INLINE_PERM + +#elif ASCON_SINGLE_PERM + +void P(state_t* s, uint8_t rounds) { + printstate(" permutation input", s); + for (int i = START(rounds); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s) { P12ROUNDS(s); } + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16 +void P8(state_t* s) { P8ROUNDS(s); } +#endif + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 +void P6(state_t* s) { P6ROUNDS(s); } +#endif + +#endif diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32/permutations.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32/permutations.h index 8b13e99..ef338f1 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/bi32/permutations.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32/permutations.h @@ -1,128 +1,189 @@ #ifndef PERMUTATIONS_H_ #define PERMUTATIONS_H_ -#include "endian.h" - -typedef unsigned char u8; -typedef unsigned int u32; -typedef unsigned long long u64; - -typedef struct { - u32 e; - u32 o; -} u32_2; - -#define EXT_BYTE64(x, n) ((u8)((u64)(x) >> (8 * (7 - (n))))) -#define INS_BYTE64(x, n) ((u64)(x) << (8 * (7 - (n)))) -#define ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n)))) - -// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 -#define to_bit_interleaving(out, in) \ - do { \ - u32 hi = (in) >> 32; \ - u32 lo = (u32)(in); \ - u32 r0, r1; \ - r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \ - r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \ - r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \ - r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \ - r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \ - r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \ - r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \ - r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \ - (out).e = (lo & 0x0000FFFF) | (hi << 16); \ - (out).o = (lo >> 16) | (hi & 0xFFFF0000); \ - } while (0) - -// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 -#define from_bit_interleaving(out, in) \ - do { \ - u32 lo = ((in).e & 0x0000FFFF) | ((in).o << 16); \ - u32 hi = ((in).e >> 16) | ((in).o & 0xFFFF0000); \ - u32 r0, r1; \ - r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \ - r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \ - r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \ - r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \ - r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \ - r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \ - r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \ - r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \ - out = (u64)hi << 32 | lo; \ - } while (0) - -#define ROUND(C_e, C_o) \ - do { \ - /* round constant */ \ - x2.e ^= C_e; x2.o ^= C_o; \ - /* s-box layer */ \ - x0.e ^= x4.e; x0.o ^= x4.o; \ - x4.e ^= x3.e; x4.o ^= x3.o; \ - x2.e ^= x1.e; x2.o ^= x1.o; \ - t0.e = x0.e; t0.o = x0.o; \ - t4.e = x4.e; t4.o = x4.o; \ - t3.e = x3.e; t3.o = x3.o; \ - t1.e = x1.e; t1.o = x1.o; \ - t2.e = x2.e; t2.o = x2.o; \ - x0.e = t0.e ^ (~t1.e & t2.e); x0.o = t0.o ^ (~t1.o & t2.o); \ - x2.e = t2.e ^ (~t3.e & t4.e); x2.o = t2.o ^ (~t3.o & t4.o); \ - x4.e = t4.e ^ (~t0.e & t1.e); x4.o = t4.o ^ (~t0.o & t1.o); \ - x1.e = t1.e ^ (~t2.e & t3.e); x1.o = t1.o ^ (~t2.o & t3.o); \ - x3.e = t3.e ^ (~t4.e & t0.e); x3.o = t3.o ^ (~t4.o & t0.o); \ - x1.e ^= x0.e; x1.o ^= x0.o; \ - x3.e ^= x2.e; x3.o ^= x2.o; \ - x0.e ^= x4.e; x0.o ^= x4.o; \ - /* linear layer */ \ - t0.e = x0.e ^ ROTR32(x0.o, 4); t0.o = x0.o ^ ROTR32(x0.e, 5); \ - t1.e = x1.e ^ ROTR32(x1.e, 11); t1.o = x1.o ^ ROTR32(x1.o, 11); \ - t2.e = x2.e ^ ROTR32(x2.o, 2); t2.o = x2.o ^ ROTR32(x2.e, 3); \ - t3.e = x3.e ^ ROTR32(x3.o, 3); t3.o = x3.o ^ ROTR32(x3.e, 4); \ - t4.e = x4.e ^ ROTR32(x4.e, 17); t4.o = x4.o ^ ROTR32(x4.o, 17); \ - x0.e ^= ROTR32(t0.o, 9); x0.o ^= ROTR32(t0.e, 10); \ - x1.e ^= ROTR32(t1.o, 19); x1.o ^= ROTR32(t1.e, 20); \ - x2.e ^= t2.o; x2.o ^= ROTR32(t2.e, 1); \ - x3.e ^= ROTR32(t3.e, 5); x3.o ^= ROTR32(t3.o, 5); \ - x4.e ^= ROTR32(t4.o, 3); x4.o ^= ROTR32(t4.e, 4); \ - x2.e = ~x2.e; x2.o = ~x2.o; \ - } while(0) - -#define P12() \ - do { \ - ROUND(0xc, 0xc); \ - ROUND(0x9, 0xc); \ - ROUND(0xc, 0x9); \ - ROUND(0x9, 0x9); \ - ROUND(0x6, 0xc); \ - ROUND(0x3, 0xc); \ - ROUND(0x6, 0x9); \ - ROUND(0x3, 0x9); \ - ROUND(0xc, 0x6); \ - ROUND(0x9, 0x6); \ - ROUND(0xc, 0x3); \ - ROUND(0x9, 0x3); \ - } while (0) - -#define P8() \ - do { \ - ROUND(0x6, 0xc); \ - ROUND(0x3, 0xc); \ - ROUND(0x6, 0x9); \ - ROUND(0x3, 0x9); \ - ROUND(0xc, 0x6); \ - ROUND(0x9, 0x6); \ - ROUND(0xc, 0x3); \ - ROUND(0x9, 0x3); \ - } while (0) - -#define P6() \ - do { \ - ROUND(0x6, 0x9); \ - ROUND(0x3, 0x9); \ - ROUND(0xc, 0x6); \ - ROUND(0x9, 0x6); \ - ROUND(0xc, 0x3); \ - ROUND(0x9, 0x3); \ - } while (0) - -#endif // PERMUTATIONS_H_ +#include +#include "api.h" +#include "ascon.h" +#include "printstate.h" +#include "round.h" + +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 + +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 + +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 + +#define ASCON_HASH_BYTES 32 + +#define ASCON_128_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_128A_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) + +#define ASCON_80PQ_IV \ + U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_HASH_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) + +#define ASCON_XOF_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) + +#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) +#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) +#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) +#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) +#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) + +#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) +#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) +#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) +#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) +#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 +#define IV ASCON_128_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#if ASCON_RATE == 16 +#define IV ASCON_128A_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define PB P8 +#endif + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 +#define IV ASCON_80PQ_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#define START(n) (12 - n) + +#if ASCON_UNROLL_LOOPS + +__forceinline void P12ROUNDS(state_t* s) { + ROUND(s, 0xc, 0xc); + ROUND(s, 0x9, 0xc); + ROUND(s, 0xc, 0x9); + ROUND(s, 0x9, 0x9); + ROUND(s, 0x6, 0xc); + ROUND(s, 0x3, 0xc); + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +__forceinline void P8ROUNDS(state_t* s) { + ROUND(s, 0x6, 0xc); + ROUND(s, 0x3, 0xc); + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +__forceinline void P6ROUNDS(state_t* s) { + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +#else /* !ASCON_UNROLL_LOOPS */ + +extern const uint8_t constants[][2]; + +__forceinline void P12ROUNDS(state_t* s) { + for (int i = START(12); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +__forceinline void P8ROUNDS(state_t* s) { + for (int i = START(8); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +__forceinline void P6ROUNDS(state_t* s) { + for (int i = START(6); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +#endif + +#if ASCON_INLINE_PERM + +__forceinline void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +__forceinline void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} + +__forceinline void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#elif ASCON_SINGLE_PERM + +#define P12(s) P(s, 12) +#define P8(s) P(s, 8) +#define P6(s) P(s, 6) + +void P(state_t* s, uint8_t rounds); + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s); +void P8(state_t* s); +void P6(state_t* s); + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#endif + +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32/printstate.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32/printstate.h new file mode 100644 index 0000000..34bd476 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32/printstate.h @@ -0,0 +1,31 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32/round.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32/round.h new file mode 100644 index 0000000..d8ea3b6 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32/round.h @@ -0,0 +1,85 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" + +__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { + *K0 = WORD_T(0); + *K1 = WORD_T(0); + *K2 = WORD_T(0); +} + +__forceinline void PINIT(state_t* s) { + s->x0 = WORD_T(0); + s->x1 = WORD_T(0); + s->x2 = WORD_T(0); + s->x3 = WORD_T(0); + s->x4 = WORD_T(0); +} + +__forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) { + state_t t; + /* round constant */ + s->x2.e ^= C_e; + s->x2.o ^= C_o; + /* s-box layer */ + s->x0.e ^= s->x4.e; + s->x0.o ^= s->x4.o; + s->x4.e ^= s->x3.e; + s->x4.o ^= s->x3.o; + s->x2.e ^= s->x1.e; + s->x2.o ^= s->x1.o; + t.x0.e = s->x0.e; + t.x0.o = s->x0.o; + t.x4.e = s->x4.e; + t.x4.o = s->x4.o; + t.x3.e = s->x3.e; + t.x3.o = s->x3.o; + t.x1.e = s->x1.e; + t.x1.o = s->x1.o; + t.x2.e = s->x2.e; + t.x2.o = s->x2.o; + s->x0.e = t.x0.e ^ (~t.x1.e & t.x2.e); + s->x0.o = t.x0.o ^ (~t.x1.o & t.x2.o); + s->x2.e = t.x2.e ^ (~t.x3.e & t.x4.e); + s->x2.o = t.x2.o ^ (~t.x3.o & t.x4.o); + s->x4.e = t.x4.e ^ (~t.x0.e & t.x1.e); + s->x4.o = t.x4.o ^ (~t.x0.o & t.x1.o); + s->x1.e = t.x1.e ^ (~t.x2.e & t.x3.e); + s->x1.o = t.x1.o ^ (~t.x2.o & t.x3.o); + s->x3.e = t.x3.e ^ (~t.x4.e & t.x0.e); + s->x3.o = t.x3.o ^ (~t.x4.o & t.x0.o); + s->x1.e ^= s->x0.e; + s->x1.o ^= s->x0.o; + s->x3.e ^= s->x2.e; + s->x3.o ^= s->x2.o; + s->x0.e ^= s->x4.e; + s->x0.o ^= s->x4.o; + /* linear layer */ + t.x0.e = s->x0.e ^ ROR32(s->x0.o, 4); + t.x0.o = s->x0.o ^ ROR32(s->x0.e, 5); + t.x1.e = s->x1.e ^ ROR32(s->x1.e, 11); + t.x1.o = s->x1.o ^ ROR32(s->x1.o, 11); + t.x2.e = s->x2.e ^ ROR32(s->x2.o, 2); + t.x2.o = s->x2.o ^ ROR32(s->x2.e, 3); + t.x3.e = s->x3.e ^ ROR32(s->x3.o, 3); + t.x3.o = s->x3.o ^ ROR32(s->x3.e, 4); + t.x4.e = s->x4.e ^ ROR32(s->x4.e, 17); + t.x4.o = s->x4.o ^ ROR32(s->x4.o, 17); + s->x0.e ^= ROR32(t.x0.o, 9); + s->x0.o ^= ROR32(t.x0.e, 10); + s->x1.e ^= ROR32(t.x1.o, 19); + s->x1.o ^= ROR32(t.x1.e, 20); + s->x2.e ^= t.x2.o; + s->x2.o ^= ROR32(t.x2.e, 1); + s->x3.e ^= ROR32(t.x3.e, 5); + s->x3.o ^= ROR32(t.x3.o, 5); + s->x4.e ^= ROR32(t.x4.o, 3); + s->x4.o ^= ROR32(t.x4.e, 4); + s->x2.e = ~s->x2.e; + s->x2.o = ~s->x2.o; + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32/word.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32/word.h new file mode 100644 index 0000000..8ffcaaa --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32/word.h @@ -0,0 +1,117 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#include "config.h" + +typedef struct { + uint32_t e; + uint32_t o; +} word_t; + +__forceinline word_t WORD_T(uint64_t x) { + return (word_t){.o = x >> 32, .e = x}; +} + +__forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; } + +__forceinline uint64_t TOBI32(uint64_t in); + +__forceinline uint64_t FROMBI32(uint64_t in); + +__forceinline word_t U64TOWORD(uint64_t x) { + uint64_t w = TOBI32(x); + return (word_t){.o = w >> 32, .e = w}; +} + +__forceinline uint64_t WORDTOU64(word_t w) { + return FROMBI32((uint64_t)w.o << 32 | w.e); +} + +#define XOR(a, b) \ + do { \ + word_t tb = b; \ + (a).e ^= tb.e; \ + (a).o ^= tb.o; \ + } while (0) + +#define AND(a, b) \ + do { \ + word_t tb = b; \ + (a).e &= tb.e; \ + (a).o &= tb.o; \ + } while (0) + +__forceinline uint32_t ROR32(uint32_t x, int n) { + return x >> n | x << (32 - n); +} + +__forceinline word_t ROR64(word_t x, int n) { + word_t r; + r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); + r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); + return r; +} + +__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { + word_t r; + r.o = lo2hi.o << 16 | hi2lo.o >> 16; + r.e = lo2hi.e << 16 | hi2lo.e >> 16; + return r; +} + +__forceinline int NOTZERO(word_t a, word_t b) { + int result = 0; + for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i]; + for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i]; + return result; +} + +/* set padding byte in 64-bit Ascon word */ +__forceinline word_t PAD(int i) { + return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32); +} + +/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ +__forceinline word_t XMASK(int n) { + uint32_t mask = 0x0fffffff >> (n * 4 - 4); + return WORD_T((uint64_t)mask << 32 | mask); +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t TOBI32(uint64_t in) { + uint32_t hi = in >> 32; + uint32_t lo = in; + uint32_t r0, r1; + r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); + r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); + r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); + r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); + r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); + r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); + r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); + r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); + r0 = (lo & 0x0000FFFF) | (hi << 16); + r1 = (lo >> 16) | (hi & 0xFFFF0000); + return (uint64_t)r1 << 32 | r0; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t FROMBI32(uint64_t in) { + uint32_t r0 = in; + uint32_t r1 = in >> 32; + uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); + uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000); + r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); + r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); + r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); + r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); + r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); + r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); + r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); + r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); + return (uint64_t)hi << 32 | lo; +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_2shares/api.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_2shares/api.h new file mode 100644 index 0000000..951ee9c --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_2shares/api.h @@ -0,0 +1,6 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 +#define ASCON_RATE 16 diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_2shares/ascon.c b/ascon/Implementations/crypto_aead/ascon128av12/bi32_2shares/ascon.c new file mode 100644 index 0000000..9011a77 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_2shares/ascon.c @@ -0,0 +1,250 @@ +#include "ascon.h" + +#include "api.h" +#include "loadstore.h" +#include "permutations.h" +#include "printstate.h" + +__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2, + const uint8_t* k) { + KINIT(K0, K1, K2); + if (CRYPTO_KEYBYTES == 20) { + XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4))); + k += 4; + } + XOR(*K1, LOAD64(k)); + XOR(*K2, LOAD64(k + 8)); +} + +__forceinline void init(state_t* s, const uint8_t* npub, const uint8_t* k) { + word_t N0, N1; + word_t K0, K1, K2; + /* load nonce */ + N0 = LOAD64(npub); + N1 = LOAD64(npub + 8); + /* load key */ + loadkey(&K0, &K1, &K2, k); + /* initialization */ + PINIT(s); + XOR(s->x0, IV); + if (CRYPTO_KEYBYTES == 20) XOR(s->x0, K0); + XOR(s->x1, K1); + XOR(s->x2, K2); + XOR(s->x3, N0); + XOR(s->x4, N1); + P12(s); + if (CRYPTO_KEYBYTES == 20) XOR(s->x2, K0); + XOR(s->x3, K1); + XOR(s->x4, K2); + printstate("initialization", s); +} + +__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + word_t* restrict px; + /* process associated data */ + if (adlen) { + while (adlen >= ASCON_RATE) { + XOR(s->x0, LOAD64(ad)); + if (ASCON_RATE == 16) XOR(s->x1, LOAD64(ad + 8)); + PB(s); + ad += ASCON_RATE; + adlen -= ASCON_RATE; + } + /* final associated data block */ + px = &s->x0; + if (ASCON_RATE == 16 && adlen >= 8) { + XOR(s->x0, LOAD64(ad)); + px = &s->x1; + ad += 8; + adlen -= 8; + } + if (adlen) XOR(*px, LOAD(ad, adlen)); + XOR(*px, PAD(adlen)); + PB(s); + } + XOR(s->x4, WORD_T(1)); + printstate("process associated data", s); +} + +__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m, + uint64_t mlen) { + word_t* restrict px; + /* process plaintext */ + while (mlen >= ASCON_RATE) { + XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + if (ASCON_RATE == 16) { + XOR(s->x1, LOAD64(m + 8)); + STORE64(c + 8, s->x1); + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + mlen -= ASCON_RATE; + } + /* final plaintext block */ + px = &s->x0; + if (ASCON_RATE == 16 && mlen >= 8) { + XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + px = &s->x1; + m += 8; + c += 8; + mlen -= 8; + } + if (mlen) { + XOR(*px, LOAD(m, mlen)); + STORE(c, *px, mlen); + } + XOR(*px, PAD(mlen)); + printstate("process plaintext", s); +} + +__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c, + uint64_t clen) { + word_t* restrict px; + word_t cx; + /* process ciphertext */ + while (clen >= ASCON_RATE) { + cx = LOAD64(c); + XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + if (ASCON_RATE == 16) { + cx = LOAD64(c + 8); + XOR(s->x1, cx); + STORE64(m + 8, s->x1); + s->x1 = cx; + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + clen -= ASCON_RATE; + } + /* final ciphertext block */ + px = &s->x0; + if (ASCON_RATE == 16 && clen >= 8) { + cx = LOAD64(c); + XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + px = &s->x1; + m += 8; + c += 8; + clen -= 8; + } + if (clen) { + cx = LOAD(c, clen); + XOR(*px, cx); + STORE(m, *px, clen); + AND(*px, XMASK(clen)); + XOR(*px, cx); + } + XOR(*px, PAD(clen)); + printstate("process ciphertext", s); +} + +__forceinline void final(state_t* s, const uint8_t* k) { + word_t K0, K1, K2; + /* load key */ + loadkey(&K0, &K1, &K2, k); + /* finalization */ + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) { + XOR(s->x1, K1); + XOR(s->x2, K2); + } + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { + XOR(s->x2, K1); + XOR(s->x3, K2); + } + if (CRYPTO_KEYBYTES == 20) { + XOR(s->x1, KEYROT(K0, K1)); + XOR(s->x2, KEYROT(K1, K2)); + XOR(s->x3, KEYROT(K2, WORD_T(0))); + } + P12(s); + XOR(s->x3, K1); + XOR(s->x4, K2); + printstate("finalization", s); +} + +#if !ASCON_INLINE_MODE + +#define INIT ascon_init +#define ABSORB ascon_absorb +#define ENCRYPT ascon_encrypt +#define DECRYPT ascon_decrypt +#define FINAL ascon_final + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) { + init(s, npub, k); +} + +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + absorb(s, ad, adlen); +} + +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) { + encrypt(s, c, m, mlen); +} + +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) { + decrypt(s, m, c, clen); +} + +void ascon_final(state_t* s, const uint8_t* k) { final(s, k); } + +#else + +#define INIT init +#define ABSORB absorb +#define ENCRYPT encrypt +#define DECRYPT decrypt +#define FINAL final + +#endif + +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + state_t s; + (void)nsec; + *clen = mlen + CRYPTO_ABYTES; + /* perform ascon computation */ + INIT(&s, npub, k); + ABSORB(&s, ad, adlen); + ENCRYPT(&s, c, m, mlen); + FINAL(&s, k); + /* set tag */ + c += mlen; + STORE64(c, s.x3); + STORE64(c + 8, s.x4); + return 0; +} + +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { + state_t s; + (void)nsec; + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + *mlen = clen = clen - CRYPTO_ABYTES; + /* perform ascon computation */ + INIT(&s, npub, k); + ABSORB(&s, ad, adlen); + DECRYPT(&s, m, c, clen); + FINAL(&s, k); + /* verify tag (should be constant time, check compiler output) */ + c += clen; + XOR(s.x3, LOAD64(c)); + XOR(s.x4, LOAD64(c + 8)); + if (NOTZERO(s.x3, s.x4)) { + *mlen = 0; + return -1; + } + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_2shares/ascon.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_2shares/ascon.h new file mode 100644 index 0000000..d11fa01 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_2shares/ascon.h @@ -0,0 +1,20 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#include "config.h" +#include "word.h" + +typedef struct { + word_t x0, x1, x2, x3, x4; + word_t rx; +} state_t; + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k); +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen); +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); +void ascon_final(state_t* s, const uint8_t* k); + +#endif // ASCON_H_ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_2shares/config.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_2shares/config.h new file mode 100644 index 0000000..e1a4d6e --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_2shares/config.h @@ -0,0 +1,44 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* inline the Ascon mode */ +#ifndef ASCON_INLINE_MODE +#define ASCON_INLINE_MODE 1 +#endif + +/* inline the Ascon permutations */ +#ifndef ASCON_INLINE_PERM +#define ASCON_INLINE_PERM 0 +#endif + +/* single function for all permutations */ +#ifndef ASCON_SINGLE_PERM +#define ASCON_SINGLE_PERM 0 +#endif + +/* unroll the permutation loops */ +#ifndef ASCON_UNROLL_LOOPS +#define ASCON_UNROLL_LOOPS 0 +#endif + +/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ +#ifndef ASCON_DATA_ACCESS +#define ASCON_DATA_ACCESS 'B' +#endif + +/* Ascon mask generator { 'S'tdlib, 'X'orshift, 'R'andombytes } */ +#ifndef ASCON_MASK_RNG +#define ASCON_MASK_RNG 'X' +#endif + +/* mask key/data loads */ +#ifndef ASCON_MASK_LOADS +#define ASCON_MASK_LOADS 0 +#endif + +/* make sure __forceinline is supported */ +#ifndef __forceinline +#define __forceinline inline __attribute__((always_inline)) +#endif + +#endif /* CONFIG_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_2shares/endian.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_2shares/endian.h new file mode 100644 index 0000000..3944360 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_2shares/endian.h @@ -0,0 +1,39 @@ +#ifndef ENDIAN_H_ +#define ENDIAN_H_ + +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif +#define U64BIG(x) (x) +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +#elif defined(_MSC_VER) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) +#define U32BIG(x) \ + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) + +#else +#error "Ascon byte order macros not defined in endian.h" +#endif + +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_2shares/implementors b/ascon/Implementations/crypto_aead/ascon128av12/bi32_2shares/implementors new file mode 100644 index 0000000..b110c1a --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_2shares/implementors @@ -0,0 +1,2 @@ +Christoph Dobraunig +Martin Schläffer diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_2shares/loadstore.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_2shares/loadstore.h new file mode 100644 index 0000000..6cb0cca --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_2shares/loadstore.h @@ -0,0 +1,118 @@ +#ifndef LOADSTORE_H_ +#define LOADSTORE_H_ + +#include + +#include "config.h" +#include "endian.h" +#include "word.h" + +/* 64-bit LSB mask (undefined for n == 0) */ +#define MASK(n) (~0ull >> (64 - (n))) + +/* get byte from Ascon 64-bit word */ +#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i)))) + +/* set byte in Ascon 64-bit word */ +#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i))) + +#if ASCON_DATA_ACCESS == 'W' + +#ifndef NDEBUG +#pragma message("Using wordwise data access") +#endif + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = U64BIG(*(uint64_t*)bytes); + return U64TOWORD(x); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = U64BIG(*(uint64_t*)bytes & MASK(8 * n)); + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(8 * n); + *(uint64_t*)bytes |= U64BIG(x); +} + +#elif ASCON_DATA_ACCESS == 'M' + +#ifndef NDEBUG +#pragma message("Using memcpy to access data") +#endif + +#include + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + memcpy((uint8_t*)&x, bytes, n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = U64BIG(WORDTOU64(w)); + memcpy(bytes, (uint8_t*)&x, n); +} + +#elif ASCON_DATA_ACCESS == 'B' + +#ifndef NDEBUG +#pragma message("Using bytewise data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#elif ASCON_DATA_ACCESS == 'H' + +#ifndef NDEBUG +#pragma message("Using hybrid data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + if (n == 8) + x = U64BIG(*(uint64_t*)bytes); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + if (n == 8) + *(uint64_t*)bytes = U64BIG(x); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#else +#error "Ascon data access macro not defined correctly" +#endif + +#endif /* LOADSTORE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_2shares/permutations.c b/ascon/Implementations/crypto_aead/ascon128av12/bi32_2shares/permutations.c new file mode 100644 index 0000000..1bca2ef --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_2shares/permutations.c @@ -0,0 +1,35 @@ +#include "permutations.h" + +#include "round.h" + +#if !ASCON_UNROLL_LOOPS || ASCON_SINGLE_PERM + +const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, + {0x6, 0xc}, {0x3, 0xc}, {0x6, 0x9}, {0x3, 0x9}, + {0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}}; + +#endif + +#if ASCON_INLINE_PERM + +#elif ASCON_SINGLE_PERM + +void P(state_t* s, uint8_t rounds) { + printstate(" permutation input", s); + for (int i = START(rounds); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s) { P12ROUNDS(s); } + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16 +void P8(state_t* s) { P8ROUNDS(s); } +#endif + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 +void P6(state_t* s) { P6ROUNDS(s); } +#endif + +#endif diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_2shares/permutations.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_2shares/permutations.h new file mode 100644 index 0000000..ef338f1 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_2shares/permutations.h @@ -0,0 +1,189 @@ +#ifndef PERMUTATIONS_H_ +#define PERMUTATIONS_H_ + +#include + +#include "api.h" +#include "ascon.h" +#include "printstate.h" +#include "round.h" + +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 + +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 + +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 + +#define ASCON_HASH_BYTES 32 + +#define ASCON_128_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_128A_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) + +#define ASCON_80PQ_IV \ + U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_HASH_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) + +#define ASCON_XOF_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) + +#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) +#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) +#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) +#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) +#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) + +#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) +#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) +#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) +#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) +#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 +#define IV ASCON_128_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#if ASCON_RATE == 16 +#define IV ASCON_128A_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define PB P8 +#endif + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 +#define IV ASCON_80PQ_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#define START(n) (12 - n) + +#if ASCON_UNROLL_LOOPS + +__forceinline void P12ROUNDS(state_t* s) { + ROUND(s, 0xc, 0xc); + ROUND(s, 0x9, 0xc); + ROUND(s, 0xc, 0x9); + ROUND(s, 0x9, 0x9); + ROUND(s, 0x6, 0xc); + ROUND(s, 0x3, 0xc); + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +__forceinline void P8ROUNDS(state_t* s) { + ROUND(s, 0x6, 0xc); + ROUND(s, 0x3, 0xc); + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +__forceinline void P6ROUNDS(state_t* s) { + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +#else /* !ASCON_UNROLL_LOOPS */ + +extern const uint8_t constants[][2]; + +__forceinline void P12ROUNDS(state_t* s) { + for (int i = START(12); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +__forceinline void P8ROUNDS(state_t* s) { + for (int i = START(8); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +__forceinline void P6ROUNDS(state_t* s) { + for (int i = START(6); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +#endif + +#if ASCON_INLINE_PERM + +__forceinline void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +__forceinline void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} + +__forceinline void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#elif ASCON_SINGLE_PERM + +#define P12(s) P(s, 12) +#define P8(s) P(s, 8) +#define P6(s) P(s, 6) + +void P(state_t* s, uint8_t rounds); + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s); +void P8(state_t* s); +void P6(state_t* s); + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#endif + +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_2shares/printstate.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_2shares/printstate.h new file mode 100644 index 0000000..34bd476 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_2shares/printstate.h @@ -0,0 +1,31 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_2shares/random.c b/ascon/Implementations/crypto_aead/ascon128av12/bi32_2shares/random.c new file mode 100644 index 0000000..81d0081 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_2shares/random.c @@ -0,0 +1,62 @@ + +#include "random.h" + +#include +#include + +#include "config.h" + +#if ASCON_MASK_RNG == 'R' + +void randinit() { srand(time(0)); } + +uint32_t rand32() { + uint32_t r; + randombytes(&r, 4); + return r; +} + +uint64_t rand64() { + uint64_t r; + randombytes(&r, 8); + return r; +} + +#elif ASCON_MASK_RNG == 'S' + +void randinit() { srand(time(0)); } + +uint32_t rand32() { return ((uint32_t)rand() << 21) ^ rand(); } + +uint64_t rand64() { + return ((uint64_t)rand() << 43) ^ ((uint64_t)rand() << 21) ^ rand(); +} + +#elif ASCON_MASK_RNG == 'X' + +uint32_t xorshift32; +uint64_t xorshift64; + +void randinit() { + srand(time(0)); + xorshift32 = rand(); + xorshift64 = (uint64_t)rand() << 32 | rand(); +} + +uint32_t rand32() { + uint32_t x = xorshift32; + x ^= x << 13; + x ^= x >> 17; + x ^= x << 5; + return xorshift32 = x; +} + +uint64_t rand64() { + uint64_t x = xorshift64; + x ^= x << 13; + x ^= x >> 7; + x ^= x << 17; + return xorshift64 = x; +} + +#endif diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_2shares/random.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_2shares/random.h new file mode 100644 index 0000000..89226b6 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_2shares/random.h @@ -0,0 +1,10 @@ +#ifndef RANDOM_H_ +#define RANDOM_H_ + +#include + +void randinit(); +uint32_t rand32(); +uint64_t rand64(); + +#endif /* RANDOM_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_2shares/round.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_2shares/round.h new file mode 100644 index 0000000..77acbd3 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_2shares/round.h @@ -0,0 +1,142 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" +#include "random.h" + +__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { + *K0 = TOSHARES(0); + *K1 = TOSHARES(0); + *K2 = TOSHARES(0); +} + +__forceinline void PINIT(state_t* s) { + randinit(); + s->x0 = TOSHARES(0); + s->x1 = TOSHARES(0); + s->x2 = TOSHARES(0); + s->x3 = TOSHARES(0); + s->x4 = TOSHARES(0); + s->rx = TOSHARES(0); +} + +#define TOFFOLI(a0, a1, b0, b1, c0, c1) \ + do { \ + (a0) ^= (~(b0)) & (c1); \ + (a0) ^= (~(b0)) & (c0); \ + (a1) ^= (b1) & (c1); \ + (a1) ^= (b1) & (c0); \ + } while (0) + +__forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) { + state_t t; + /* refresh randomness */ + /* s->rx = TOSHARES(0); */ + /* addition of round constant */ + s->x2.s0.e ^= C_e; + /* substitution layer */ + s->x0.s0.e ^= s->x4.s0.e; + s->x4.s0.e ^= s->x3.s0.e; + s->x2.s0.e ^= s->x1.s0.e; + s->x0.s1.e ^= s->x4.s1.e; + s->x4.s1.e ^= s->x3.s1.e; + s->x2.s1.e ^= s->x1.s1.e; + /* start of shared keccak s-box from https://eprint.iacr.org/2019/536 */ + s->rx.s0.e = s->rx.s1.e; + TOFFOLI(s->rx.s0.e, s->rx.s1.e, s->x4.s0.e, s->x4.s1.e, s->x0.s0.e, + s->x0.s1.e); + TOFFOLI(s->x0.s0.e, s->x0.s1.e, s->x1.s0.e, s->x1.s1.e, s->x2.s0.e, + s->x2.s1.e); + TOFFOLI(s->x2.s0.e, s->x2.s1.e, s->x3.s0.e, s->x3.s1.e, s->x4.s0.e, + s->x4.s1.e); + TOFFOLI(s->x4.s0.e, s->x4.s1.e, s->x0.s0.e, s->x0.s1.e, s->x1.s0.e, + s->x1.s1.e); + TOFFOLI(s->x1.s0.e, s->x1.s1.e, s->x2.s0.e, s->x2.s1.e, s->x3.s0.e, + s->x3.s1.e); + s->x3.s1.e ^= s->rx.s1.e; + s->x3.s0.e ^= s->rx.s0.e; + /* end of shared keccak s-box */ + s->x1.s0.e ^= s->x0.s0.e; + s->x0.s0.e ^= s->x4.s0.e; + s->x3.s0.e ^= s->x2.s0.e; + s->x2.s0.e = ~s->x2.s0.e; + s->x1.s1.e ^= s->x0.s1.e; + s->x0.s1.e ^= s->x4.s1.e; + s->x3.s1.e ^= s->x2.s1.e; + /* addition of round constant */ + s->x2.s0.o ^= C_o; + /* substitution layer */ + s->x0.s0.o ^= s->x4.s0.o; + s->x4.s0.o ^= s->x3.s0.o; + s->x2.s0.o ^= s->x1.s0.o; + s->x0.s1.o ^= s->x4.s1.o; + s->x4.s1.o ^= s->x3.s1.o; + s->x2.s1.o ^= s->x1.s1.o; + /* start of shared keccak s-box from https://eprint.iacr.org/2019/536 */ + s->rx.s0.o = s->rx.s1.o; + TOFFOLI(s->rx.s0.o, s->rx.s1.o, s->x4.s0.o, s->x4.s1.o, s->x0.s0.o, + s->x0.s1.o); + TOFFOLI(s->x0.s0.o, s->x0.s1.o, s->x1.s0.o, s->x1.s1.o, s->x2.s0.o, + s->x2.s1.o); + TOFFOLI(s->x2.s0.o, s->x2.s1.o, s->x3.s0.o, s->x3.s1.o, s->x4.s0.o, + s->x4.s1.o); + TOFFOLI(s->x4.s0.o, s->x4.s1.o, s->x0.s0.o, s->x0.s1.o, s->x1.s0.o, + s->x1.s1.o); + TOFFOLI(s->x1.s0.o, s->x1.s1.o, s->x2.s0.o, s->x2.s1.o, s->x3.s0.o, + s->x3.s1.o); + s->x3.s1.o ^= s->rx.s1.o; + s->x3.s0.o ^= s->rx.s0.o; + /* end of shared keccak s-box */ + s->x1.s0.o ^= s->x0.s0.o; + s->x0.s0.o ^= s->x4.s0.o; + s->x3.s0.o ^= s->x2.s0.o; + s->x2.s0.o = ~s->x2.s0.o; + s->x1.s1.o ^= s->x0.s1.o; + s->x0.s1.o ^= s->x4.s1.o; + s->x3.s1.o ^= s->x2.s1.o; + /* linear diffusion layer */ + t.x0.s1.e = s->x0.s1.e ^ ROR32(s->x0.s1.o, 4); + t.x0.s1.o = s->x0.s1.o ^ ROR32(s->x0.s1.e, 5); + t.x1.s1.e = s->x1.s1.e ^ ROR32(s->x1.s1.e, 11); + t.x1.s1.o = s->x1.s1.o ^ ROR32(s->x1.s1.o, 11); + t.x2.s1.e = s->x2.s1.e ^ ROR32(s->x2.s1.o, 2); + t.x2.s1.o = s->x2.s1.o ^ ROR32(s->x2.s1.e, 3); + t.x3.s1.e = s->x3.s1.e ^ ROR32(s->x3.s1.o, 3); + t.x3.s1.o = s->x3.s1.o ^ ROR32(s->x3.s1.e, 4); + t.x4.s1.e = s->x4.s1.e ^ ROR32(s->x4.s1.e, 17); + t.x4.s1.o = s->x4.s1.o ^ ROR32(s->x4.s1.o, 17); + s->x0.s1.e ^= ROR32(t.x0.s1.o, 9); + s->x0.s1.o ^= ROR32(t.x0.s1.e, 10); + s->x1.s1.e ^= ROR32(t.x1.s1.o, 19); + s->x1.s1.o ^= ROR32(t.x1.s1.e, 20); + s->x2.s1.e ^= t.x2.s1.o; + s->x2.s1.o ^= ROR32(t.x2.s1.e, 1); + s->x3.s1.e ^= ROR32(t.x3.s1.e, 5); + s->x3.s1.o ^= ROR32(t.x3.s1.o, 5); + s->x4.s1.e ^= ROR32(t.x4.s1.o, 3); + s->x4.s1.o ^= ROR32(t.x4.s1.e, 4); + t.x0.s0.e = s->x0.s0.e ^ ROR32(s->x0.s0.o, 4); + t.x0.s0.o = s->x0.s0.o ^ ROR32(s->x0.s0.e, 5); + t.x1.s0.e = s->x1.s0.e ^ ROR32(s->x1.s0.e, 11); + t.x1.s0.o = s->x1.s0.o ^ ROR32(s->x1.s0.o, 11); + t.x2.s0.e = s->x2.s0.e ^ ROR32(s->x2.s0.o, 2); + t.x2.s0.o = s->x2.s0.o ^ ROR32(s->x2.s0.e, 3); + t.x3.s0.e = s->x3.s0.e ^ ROR32(s->x3.s0.o, 3); + t.x3.s0.o = s->x3.s0.o ^ ROR32(s->x3.s0.e, 4); + t.x4.s0.e = s->x4.s0.e ^ ROR32(s->x4.s0.e, 17); + t.x4.s0.o = s->x4.s0.o ^ ROR32(s->x4.s0.o, 17); + s->x0.s0.e ^= ROR32(t.x0.s0.o, 9); + s->x0.s0.o ^= ROR32(t.x0.s0.e, 10); + s->x1.s0.e ^= ROR32(t.x1.s0.o, 19); + s->x1.s0.o ^= ROR32(t.x1.s0.e, 20); + s->x2.s0.e ^= t.x2.s0.o; + s->x2.s0.o ^= ROR32(t.x2.s0.e, 1); + s->x3.s0.e ^= ROR32(t.x3.s0.e, 5); + s->x3.s0.o ^= ROR32(t.x3.s0.o, 5); + s->x4.s0.e ^= ROR32(t.x4.s0.o, 3); + s->x4.s0.o ^= ROR32(t.x4.s0.e, 4); + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_2shares/word.c b/ascon/Implementations/crypto_aead/ascon128av12/bi32_2shares/word.c new file mode 100644 index 0000000..b2dc0f3 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_2shares/word.c @@ -0,0 +1,40 @@ +#include "word.h" + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +share_t TOBI32(share_t in) { + uint32_t r0, r1; + uint32_t lo = in.e; + uint32_t hi = in.o; + r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); + r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); + r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); + r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); + r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); + r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); + r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); + r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); + r0 = (lo & 0x0000FFFF) | (hi << 16); + r1 = (lo >> 16) | (hi & 0xFFFF0000); + in.e = r0; + in.o = r1; + return in; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +share_t FROMBI32(share_t in) { + uint32_t r0 = in.e; + uint32_t r1 = in.o; + uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); + uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000); + r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); + r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); + r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); + r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); + r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); + r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); + r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); + r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); + in.e = lo; + in.o = hi; + return in; +} diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_2shares/word.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_2shares/word.h new file mode 100644 index 0000000..6635995 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_2shares/word.h @@ -0,0 +1,122 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#include "config.h" +#include "random.h" + +typedef struct { + uint32_t e; + uint32_t o; +} share_t; + +typedef struct { + share_t s0; + share_t s1; +} word_t; + +__forceinline word_t WORD_T(uint64_t x) { + word_t w; + w.s0.e = (uint32_t)x; + w.s0.o = x >> 32; + w.s1.e = 0; + w.s1.o = 0; + return w; +} + +__forceinline uint64_t UINT64_T(word_t w) { + return (uint64_t)w.s0.o << 32 | w.s0.e; +} + +share_t TOBI32(share_t in); + +share_t FROMBI32(share_t in); + +__forceinline word_t TOSHARES(uint64_t in) { + uint32_t r0 = rand32(); + uint32_t r1 = rand32(); + word_t w; + w.s0.e = (uint32_t)in ^ r0; + w.s0.o = (in >> 32) ^ r1; + w.s1.e = r0; + w.s1.o = r1; + return w; +} + +__forceinline uint64_t FROMSHARES(word_t in) { + return (uint64_t)(in.s0.o ^ in.s1.o) << 32 | (in.s0.e ^ in.s1.e); +} + +__forceinline word_t U64TOWORD(uint64_t x) { +#if ASCON_MASK_LOADS + word_t w = TOSHARES(x); + w.s0 = TOBI32(w.s0); + w.s1 = TOBI32(w.s1); + return w; +#else + word_t w = WORD_T(x); + w.s0 = TOBI32(w.s0); + return w; +#endif +} + +__forceinline uint64_t WORDTOU64(word_t w) { + w.s0 = FROMBI32(w.s0); + w.s1 = FROMBI32(w.s1); + return (uint64_t)FROMSHARES(w); +} + +#define XOR(a, b) \ + do { \ + word_t tb = b; \ + (a).s0.e ^= tb.s0.e; \ + (a).s0.o ^= tb.s0.o; \ + (a).s1.e ^= tb.s1.e; \ + (a).s1.o ^= tb.s1.o; \ + } while (0) + +#define AND(a, b) \ + do { \ + word_t ta = a; \ + word_t tb = b; \ + (a).s0.e = (ta.s0.e & tb.s0.e) ^ (ta.s0.e & tb.s1.e); \ + (a).s0.o = (ta.s0.o & tb.s0.e) ^ (ta.s0.o & tb.s1.o); \ + (a).s1.e = (ta.s1.e & tb.s0.e) ^ (ta.s1.e & tb.s1.e); \ + (a).s1.o = (ta.s1.o & tb.s0.o) ^ (ta.s1.o & tb.s1.o); \ + } while (0) + +__forceinline uint32_t ROR32(uint32_t x, int n) { + return x >> n | x << (32 - n); +} + +__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { + word_t r; + r.s0.e = lo2hi.s0.e << 16 | hi2lo.s0.e >> 16; + r.s0.o = lo2hi.s0.o << 16 | hi2lo.s0.o >> 16; + r.s1.e = lo2hi.s1.e << 16 | hi2lo.s1.e >> 16; + r.s1.o = lo2hi.s1.o << 16 | hi2lo.s1.o >> 16; + return r; +} + +__forceinline int NOTZERO(word_t a, word_t b) { + int result = 0; + for (int i = 0; i < 8; ++i) + result |= ((uint8_t*)&(a.s0))[i] ^ ((uint8_t*)&(a.s1))[i]; + for (int i = 0; i < 8; ++i) + result |= ((uint8_t*)&(b.s0))[i] ^ ((uint8_t*)&(b.s1))[i]; + return result; +} + +/* set padding byte in 64-bit Ascon word */ +__forceinline word_t PAD(int i) { + return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32); +} + +/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ +__forceinline word_t XMASK(int n) { + uint32_t mask = 0x0fffffff >> (n * 4 - 4); + return WORD_T((uint64_t)mask << 32 | mask); +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_3shares/api.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_3shares/api.h new file mode 100644 index 0000000..951ee9c --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_3shares/api.h @@ -0,0 +1,6 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 +#define ASCON_RATE 16 diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_3shares/ascon.c b/ascon/Implementations/crypto_aead/ascon128av12/bi32_3shares/ascon.c new file mode 100644 index 0000000..9011a77 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_3shares/ascon.c @@ -0,0 +1,250 @@ +#include "ascon.h" + +#include "api.h" +#include "loadstore.h" +#include "permutations.h" +#include "printstate.h" + +__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2, + const uint8_t* k) { + KINIT(K0, K1, K2); + if (CRYPTO_KEYBYTES == 20) { + XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4))); + k += 4; + } + XOR(*K1, LOAD64(k)); + XOR(*K2, LOAD64(k + 8)); +} + +__forceinline void init(state_t* s, const uint8_t* npub, const uint8_t* k) { + word_t N0, N1; + word_t K0, K1, K2; + /* load nonce */ + N0 = LOAD64(npub); + N1 = LOAD64(npub + 8); + /* load key */ + loadkey(&K0, &K1, &K2, k); + /* initialization */ + PINIT(s); + XOR(s->x0, IV); + if (CRYPTO_KEYBYTES == 20) XOR(s->x0, K0); + XOR(s->x1, K1); + XOR(s->x2, K2); + XOR(s->x3, N0); + XOR(s->x4, N1); + P12(s); + if (CRYPTO_KEYBYTES == 20) XOR(s->x2, K0); + XOR(s->x3, K1); + XOR(s->x4, K2); + printstate("initialization", s); +} + +__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + word_t* restrict px; + /* process associated data */ + if (adlen) { + while (adlen >= ASCON_RATE) { + XOR(s->x0, LOAD64(ad)); + if (ASCON_RATE == 16) XOR(s->x1, LOAD64(ad + 8)); + PB(s); + ad += ASCON_RATE; + adlen -= ASCON_RATE; + } + /* final associated data block */ + px = &s->x0; + if (ASCON_RATE == 16 && adlen >= 8) { + XOR(s->x0, LOAD64(ad)); + px = &s->x1; + ad += 8; + adlen -= 8; + } + if (adlen) XOR(*px, LOAD(ad, adlen)); + XOR(*px, PAD(adlen)); + PB(s); + } + XOR(s->x4, WORD_T(1)); + printstate("process associated data", s); +} + +__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m, + uint64_t mlen) { + word_t* restrict px; + /* process plaintext */ + while (mlen >= ASCON_RATE) { + XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + if (ASCON_RATE == 16) { + XOR(s->x1, LOAD64(m + 8)); + STORE64(c + 8, s->x1); + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + mlen -= ASCON_RATE; + } + /* final plaintext block */ + px = &s->x0; + if (ASCON_RATE == 16 && mlen >= 8) { + XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + px = &s->x1; + m += 8; + c += 8; + mlen -= 8; + } + if (mlen) { + XOR(*px, LOAD(m, mlen)); + STORE(c, *px, mlen); + } + XOR(*px, PAD(mlen)); + printstate("process plaintext", s); +} + +__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c, + uint64_t clen) { + word_t* restrict px; + word_t cx; + /* process ciphertext */ + while (clen >= ASCON_RATE) { + cx = LOAD64(c); + XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + if (ASCON_RATE == 16) { + cx = LOAD64(c + 8); + XOR(s->x1, cx); + STORE64(m + 8, s->x1); + s->x1 = cx; + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + clen -= ASCON_RATE; + } + /* final ciphertext block */ + px = &s->x0; + if (ASCON_RATE == 16 && clen >= 8) { + cx = LOAD64(c); + XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + px = &s->x1; + m += 8; + c += 8; + clen -= 8; + } + if (clen) { + cx = LOAD(c, clen); + XOR(*px, cx); + STORE(m, *px, clen); + AND(*px, XMASK(clen)); + XOR(*px, cx); + } + XOR(*px, PAD(clen)); + printstate("process ciphertext", s); +} + +__forceinline void final(state_t* s, const uint8_t* k) { + word_t K0, K1, K2; + /* load key */ + loadkey(&K0, &K1, &K2, k); + /* finalization */ + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) { + XOR(s->x1, K1); + XOR(s->x2, K2); + } + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { + XOR(s->x2, K1); + XOR(s->x3, K2); + } + if (CRYPTO_KEYBYTES == 20) { + XOR(s->x1, KEYROT(K0, K1)); + XOR(s->x2, KEYROT(K1, K2)); + XOR(s->x3, KEYROT(K2, WORD_T(0))); + } + P12(s); + XOR(s->x3, K1); + XOR(s->x4, K2); + printstate("finalization", s); +} + +#if !ASCON_INLINE_MODE + +#define INIT ascon_init +#define ABSORB ascon_absorb +#define ENCRYPT ascon_encrypt +#define DECRYPT ascon_decrypt +#define FINAL ascon_final + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) { + init(s, npub, k); +} + +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + absorb(s, ad, adlen); +} + +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) { + encrypt(s, c, m, mlen); +} + +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) { + decrypt(s, m, c, clen); +} + +void ascon_final(state_t* s, const uint8_t* k) { final(s, k); } + +#else + +#define INIT init +#define ABSORB absorb +#define ENCRYPT encrypt +#define DECRYPT decrypt +#define FINAL final + +#endif + +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + state_t s; + (void)nsec; + *clen = mlen + CRYPTO_ABYTES; + /* perform ascon computation */ + INIT(&s, npub, k); + ABSORB(&s, ad, adlen); + ENCRYPT(&s, c, m, mlen); + FINAL(&s, k); + /* set tag */ + c += mlen; + STORE64(c, s.x3); + STORE64(c + 8, s.x4); + return 0; +} + +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { + state_t s; + (void)nsec; + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + *mlen = clen = clen - CRYPTO_ABYTES; + /* perform ascon computation */ + INIT(&s, npub, k); + ABSORB(&s, ad, adlen); + DECRYPT(&s, m, c, clen); + FINAL(&s, k); + /* verify tag (should be constant time, check compiler output) */ + c += clen; + XOR(s.x3, LOAD64(c)); + XOR(s.x4, LOAD64(c + 8)); + if (NOTZERO(s.x3, s.x4)) { + *mlen = 0; + return -1; + } + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_3shares/ascon.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_3shares/ascon.h new file mode 100644 index 0000000..d11fa01 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_3shares/ascon.h @@ -0,0 +1,20 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#include "config.h" +#include "word.h" + +typedef struct { + word_t x0, x1, x2, x3, x4; + word_t rx; +} state_t; + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k); +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen); +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); +void ascon_final(state_t* s, const uint8_t* k); + +#endif // ASCON_H_ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_3shares/config.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_3shares/config.h new file mode 100644 index 0000000..e1a4d6e --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_3shares/config.h @@ -0,0 +1,44 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* inline the Ascon mode */ +#ifndef ASCON_INLINE_MODE +#define ASCON_INLINE_MODE 1 +#endif + +/* inline the Ascon permutations */ +#ifndef ASCON_INLINE_PERM +#define ASCON_INLINE_PERM 0 +#endif + +/* single function for all permutations */ +#ifndef ASCON_SINGLE_PERM +#define ASCON_SINGLE_PERM 0 +#endif + +/* unroll the permutation loops */ +#ifndef ASCON_UNROLL_LOOPS +#define ASCON_UNROLL_LOOPS 0 +#endif + +/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ +#ifndef ASCON_DATA_ACCESS +#define ASCON_DATA_ACCESS 'B' +#endif + +/* Ascon mask generator { 'S'tdlib, 'X'orshift, 'R'andombytes } */ +#ifndef ASCON_MASK_RNG +#define ASCON_MASK_RNG 'X' +#endif + +/* mask key/data loads */ +#ifndef ASCON_MASK_LOADS +#define ASCON_MASK_LOADS 0 +#endif + +/* make sure __forceinline is supported */ +#ifndef __forceinline +#define __forceinline inline __attribute__((always_inline)) +#endif + +#endif /* CONFIG_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_3shares/endian.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_3shares/endian.h new file mode 100644 index 0000000..3944360 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_3shares/endian.h @@ -0,0 +1,39 @@ +#ifndef ENDIAN_H_ +#define ENDIAN_H_ + +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif +#define U64BIG(x) (x) +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +#elif defined(_MSC_VER) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) +#define U32BIG(x) \ + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) + +#else +#error "Ascon byte order macros not defined in endian.h" +#endif + +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_3shares/implementors b/ascon/Implementations/crypto_aead/ascon128av12/bi32_3shares/implementors new file mode 100644 index 0000000..b110c1a --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_3shares/implementors @@ -0,0 +1,2 @@ +Christoph Dobraunig +Martin Schläffer diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_3shares/loadstore.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_3shares/loadstore.h new file mode 100644 index 0000000..6cb0cca --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_3shares/loadstore.h @@ -0,0 +1,118 @@ +#ifndef LOADSTORE_H_ +#define LOADSTORE_H_ + +#include + +#include "config.h" +#include "endian.h" +#include "word.h" + +/* 64-bit LSB mask (undefined for n == 0) */ +#define MASK(n) (~0ull >> (64 - (n))) + +/* get byte from Ascon 64-bit word */ +#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i)))) + +/* set byte in Ascon 64-bit word */ +#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i))) + +#if ASCON_DATA_ACCESS == 'W' + +#ifndef NDEBUG +#pragma message("Using wordwise data access") +#endif + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = U64BIG(*(uint64_t*)bytes); + return U64TOWORD(x); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = U64BIG(*(uint64_t*)bytes & MASK(8 * n)); + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(8 * n); + *(uint64_t*)bytes |= U64BIG(x); +} + +#elif ASCON_DATA_ACCESS == 'M' + +#ifndef NDEBUG +#pragma message("Using memcpy to access data") +#endif + +#include + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + memcpy((uint8_t*)&x, bytes, n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = U64BIG(WORDTOU64(w)); + memcpy(bytes, (uint8_t*)&x, n); +} + +#elif ASCON_DATA_ACCESS == 'B' + +#ifndef NDEBUG +#pragma message("Using bytewise data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#elif ASCON_DATA_ACCESS == 'H' + +#ifndef NDEBUG +#pragma message("Using hybrid data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + if (n == 8) + x = U64BIG(*(uint64_t*)bytes); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + if (n == 8) + *(uint64_t*)bytes = U64BIG(x); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#else +#error "Ascon data access macro not defined correctly" +#endif + +#endif /* LOADSTORE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_3shares/permutations.c b/ascon/Implementations/crypto_aead/ascon128av12/bi32_3shares/permutations.c new file mode 100644 index 0000000..1bca2ef --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_3shares/permutations.c @@ -0,0 +1,35 @@ +#include "permutations.h" + +#include "round.h" + +#if !ASCON_UNROLL_LOOPS || ASCON_SINGLE_PERM + +const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, + {0x6, 0xc}, {0x3, 0xc}, {0x6, 0x9}, {0x3, 0x9}, + {0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}}; + +#endif + +#if ASCON_INLINE_PERM + +#elif ASCON_SINGLE_PERM + +void P(state_t* s, uint8_t rounds) { + printstate(" permutation input", s); + for (int i = START(rounds); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s) { P12ROUNDS(s); } + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16 +void P8(state_t* s) { P8ROUNDS(s); } +#endif + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 +void P6(state_t* s) { P6ROUNDS(s); } +#endif + +#endif diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_3shares/permutations.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_3shares/permutations.h new file mode 100644 index 0000000..ef338f1 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_3shares/permutations.h @@ -0,0 +1,189 @@ +#ifndef PERMUTATIONS_H_ +#define PERMUTATIONS_H_ + +#include + +#include "api.h" +#include "ascon.h" +#include "printstate.h" +#include "round.h" + +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 + +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 + +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 + +#define ASCON_HASH_BYTES 32 + +#define ASCON_128_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_128A_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) + +#define ASCON_80PQ_IV \ + U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_HASH_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) + +#define ASCON_XOF_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) + +#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) +#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) +#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) +#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) +#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) + +#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) +#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) +#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) +#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) +#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 +#define IV ASCON_128_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#if ASCON_RATE == 16 +#define IV ASCON_128A_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define PB P8 +#endif + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 +#define IV ASCON_80PQ_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#define START(n) (12 - n) + +#if ASCON_UNROLL_LOOPS + +__forceinline void P12ROUNDS(state_t* s) { + ROUND(s, 0xc, 0xc); + ROUND(s, 0x9, 0xc); + ROUND(s, 0xc, 0x9); + ROUND(s, 0x9, 0x9); + ROUND(s, 0x6, 0xc); + ROUND(s, 0x3, 0xc); + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +__forceinline void P8ROUNDS(state_t* s) { + ROUND(s, 0x6, 0xc); + ROUND(s, 0x3, 0xc); + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +__forceinline void P6ROUNDS(state_t* s) { + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +#else /* !ASCON_UNROLL_LOOPS */ + +extern const uint8_t constants[][2]; + +__forceinline void P12ROUNDS(state_t* s) { + for (int i = START(12); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +__forceinline void P8ROUNDS(state_t* s) { + for (int i = START(8); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +__forceinline void P6ROUNDS(state_t* s) { + for (int i = START(6); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +#endif + +#if ASCON_INLINE_PERM + +__forceinline void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +__forceinline void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} + +__forceinline void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#elif ASCON_SINGLE_PERM + +#define P12(s) P(s, 12) +#define P8(s) P(s, 8) +#define P6(s) P(s, 6) + +void P(state_t* s, uint8_t rounds); + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s); +void P8(state_t* s); +void P6(state_t* s); + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#endif + +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_3shares/printstate.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_3shares/printstate.h new file mode 100644 index 0000000..34bd476 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_3shares/printstate.h @@ -0,0 +1,31 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_3shares/random.c b/ascon/Implementations/crypto_aead/ascon128av12/bi32_3shares/random.c new file mode 100644 index 0000000..81d0081 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_3shares/random.c @@ -0,0 +1,62 @@ + +#include "random.h" + +#include +#include + +#include "config.h" + +#if ASCON_MASK_RNG == 'R' + +void randinit() { srand(time(0)); } + +uint32_t rand32() { + uint32_t r; + randombytes(&r, 4); + return r; +} + +uint64_t rand64() { + uint64_t r; + randombytes(&r, 8); + return r; +} + +#elif ASCON_MASK_RNG == 'S' + +void randinit() { srand(time(0)); } + +uint32_t rand32() { return ((uint32_t)rand() << 21) ^ rand(); } + +uint64_t rand64() { + return ((uint64_t)rand() << 43) ^ ((uint64_t)rand() << 21) ^ rand(); +} + +#elif ASCON_MASK_RNG == 'X' + +uint32_t xorshift32; +uint64_t xorshift64; + +void randinit() { + srand(time(0)); + xorshift32 = rand(); + xorshift64 = (uint64_t)rand() << 32 | rand(); +} + +uint32_t rand32() { + uint32_t x = xorshift32; + x ^= x << 13; + x ^= x >> 17; + x ^= x << 5; + return xorshift32 = x; +} + +uint64_t rand64() { + uint64_t x = xorshift64; + x ^= x << 13; + x ^= x >> 7; + x ^= x << 17; + return xorshift64 = x; +} + +#endif diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_3shares/random.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_3shares/random.h new file mode 100644 index 0000000..89226b6 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_3shares/random.h @@ -0,0 +1,10 @@ +#ifndef RANDOM_H_ +#define RANDOM_H_ + +#include + +void randinit(); +uint32_t rand32(); +uint64_t rand64(); + +#endif /* RANDOM_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_3shares/round.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_3shares/round.h new file mode 100644 index 0000000..ddaf2d4 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_3shares/round.h @@ -0,0 +1,183 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" +#include "random.h" + +__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { + *K0 = TOSHARES(0); + *K1 = TOSHARES(0); + *K2 = TOSHARES(0); +} + +__forceinline void PINIT(state_t* s) { + randinit(); + s->x0 = TOSHARES(0); + s->x1 = TOSHARES(0); + s->x2 = TOSHARES(0); + s->x3 = TOSHARES(0); + s->x4 = TOSHARES(0); + s->rx = TOSHARES(0); +} + +#define TOFFOLI(a0, a1, a2, b0, b1, b2, c0, c1, c2) \ + do { \ + (a0) ^= (~(b0)) & (c0); \ + (a0) ^= (b0) & (c2); \ + (a0) ^= (b2) & (c0); \ + (a1) ^= (~(b1)) & (c1); \ + (a1) ^= (b1) & (c0); \ + (a1) ^= (b0) & (c1); \ + (a2) ^= (~(b2)) & (c2); \ + (a2) ^= (b2) & (c1); \ + (a2) ^= (b1) & (c2); \ + } while (0) + +__forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) { + state_t t; + /* refresh randomness */ + /* s->rx = TOSHARES(0); */ + /* addition of round constant */ + s->x2.s0.e ^= C_e; + /* substitution layer */ + s->x0.s0.e ^= s->x4.s0.e; + s->x4.s0.e ^= s->x3.s0.e; + s->x2.s0.e ^= s->x1.s0.e; + s->x0.s1.e ^= s->x4.s1.e; + s->x4.s1.e ^= s->x3.s1.e; + s->x2.s1.e ^= s->x1.s1.e; + s->x0.s2.e ^= s->x4.s2.e; + s->x4.s2.e ^= s->x3.s2.e; + s->x2.s2.e ^= s->x1.s2.e; + /* start of shared keccak s-box from https://eprint.iacr.org/2019/536 */ + s->rx.s2.e = s->rx.s0.e; + s->rx.s0.e ^= s->rx.s1.e; + TOFFOLI(s->rx.s0.e, s->rx.s1.e, s->rx.s2.e, s->x4.s0.e, s->x4.s1.e, + s->x4.s2.e, s->x0.s0.e, s->x0.s1.e, s->x0.s2.e); + TOFFOLI(s->x0.s0.e, s->x0.s1.e, s->x0.s2.e, s->x1.s0.e, s->x1.s1.e, + s->x1.s2.e, s->x2.s0.e, s->x2.s1.e, s->x2.s2.e); + TOFFOLI(s->x2.s0.e, s->x2.s1.e, s->x2.s2.e, s->x3.s0.e, s->x3.s1.e, + s->x3.s2.e, s->x4.s0.e, s->x4.s1.e, s->x4.s2.e); + TOFFOLI(s->x4.s0.e, s->x4.s1.e, s->x4.s2.e, s->x0.s0.e, s->x0.s1.e, + s->x0.s2.e, s->x1.s0.e, s->x1.s1.e, s->x1.s2.e); + TOFFOLI(s->x1.s0.e, s->x1.s1.e, s->x1.s2.e, s->x2.s0.e, s->x2.s1.e, + s->x2.s2.e, s->x3.s0.e, s->x3.s1.e, s->x3.s2.e); + s->x3.s2.e ^= s->rx.s2.e; + s->x3.s1.e ^= s->rx.s1.e; + s->x3.s0.e ^= s->rx.s0.e; + /* end of shared keccak s-box */ + s->x1.s0.e ^= s->x0.s0.e; + s->x0.s0.e ^= s->x4.s0.e; + s->x3.s0.e ^= s->x2.s0.e; + s->x2.s0.e = ~s->x2.s0.e; + s->x1.s1.e ^= s->x0.s1.e; + s->x0.s1.e ^= s->x4.s1.e; + s->x3.s1.e ^= s->x2.s1.e; + s->x1.s2.e ^= s->x0.s2.e; + s->x0.s2.e ^= s->x4.s2.e; + s->x3.s2.e ^= s->x2.s2.e; + /* addition of round constant */ + s->x2.s0.o ^= C_o; + /* substitution layer */ + s->x0.s0.o ^= s->x4.s0.o; + s->x4.s0.o ^= s->x3.s0.o; + s->x2.s0.o ^= s->x1.s0.o; + s->x0.s1.o ^= s->x4.s1.o; + s->x4.s1.o ^= s->x3.s1.o; + s->x2.s1.o ^= s->x1.s1.o; + s->x0.s2.o ^= s->x4.s2.o; + s->x4.s2.o ^= s->x3.s2.o; + s->x2.s2.o ^= s->x1.s2.o; + /* start of shared keccak s-box from https://eprint.iacr.org/2019/536 */ + s->rx.s2.o = s->rx.s0.o; + s->rx.s0.o ^= s->rx.s1.o; + TOFFOLI(s->rx.s0.o, s->rx.s1.o, s->rx.s2.o, s->x4.s0.o, s->x4.s1.o, + s->x4.s2.o, s->x0.s0.o, s->x0.s1.o, s->x0.s2.o); + TOFFOLI(s->x0.s0.o, s->x0.s1.o, s->x0.s2.o, s->x1.s0.o, s->x1.s1.o, + s->x1.s2.o, s->x2.s0.o, s->x2.s1.o, s->x2.s2.o); + TOFFOLI(s->x2.s0.o, s->x2.s1.o, s->x2.s2.o, s->x3.s0.o, s->x3.s1.o, + s->x3.s2.o, s->x4.s0.o, s->x4.s1.o, s->x4.s2.o); + TOFFOLI(s->x4.s0.o, s->x4.s1.o, s->x4.s2.o, s->x0.s0.o, s->x0.s1.o, + s->x0.s2.o, s->x1.s0.o, s->x1.s1.o, s->x1.s2.o); + TOFFOLI(s->x1.s0.o, s->x1.s1.o, s->x1.s2.o, s->x2.s0.o, s->x2.s1.o, + s->x2.s2.o, s->x3.s0.o, s->x3.s1.o, s->x3.s2.o); + s->x3.s2.o ^= s->rx.s2.o; + s->x3.s1.o ^= s->rx.s1.o; + s->x3.s0.o ^= s->rx.s0.o; + /* end of shared keccak s-box */ + s->x1.s0.o ^= s->x0.s0.o; + s->x0.s0.o ^= s->x4.s0.o; + s->x3.s0.o ^= s->x2.s0.o; + s->x2.s0.o = ~s->x2.s0.o; + s->x1.s1.o ^= s->x0.s1.o; + s->x0.s1.o ^= s->x4.s1.o; + s->x3.s1.o ^= s->x2.s1.o; + s->x1.s2.o ^= s->x0.s2.o; + s->x0.s2.o ^= s->x4.s2.o; + s->x3.s2.o ^= s->x2.s2.o; + /* linear diffusion layer */ + t.x0.s2.e = s->x0.s2.e ^ ROR32(s->x0.s2.o, 4); + t.x0.s2.o = s->x0.s2.o ^ ROR32(s->x0.s2.e, 5); + t.x1.s2.e = s->x1.s2.e ^ ROR32(s->x1.s2.e, 11); + t.x1.s2.o = s->x1.s2.o ^ ROR32(s->x1.s2.o, 11); + t.x2.s2.e = s->x2.s2.e ^ ROR32(s->x2.s2.o, 2); + t.x2.s2.o = s->x2.s2.o ^ ROR32(s->x2.s2.e, 3); + t.x3.s2.e = s->x3.s2.e ^ ROR32(s->x3.s2.o, 3); + t.x3.s2.o = s->x3.s2.o ^ ROR32(s->x3.s2.e, 4); + t.x4.s2.e = s->x4.s2.e ^ ROR32(s->x4.s2.e, 17); + t.x4.s2.o = s->x4.s2.o ^ ROR32(s->x4.s2.o, 17); + s->x0.s2.e ^= ROR32(t.x0.s2.o, 9); + s->x0.s2.o ^= ROR32(t.x0.s2.e, 10); + s->x1.s2.e ^= ROR32(t.x1.s2.o, 19); + s->x1.s2.o ^= ROR32(t.x1.s2.e, 20); + s->x2.s2.e ^= t.x2.s2.o; + s->x2.s2.o ^= ROR32(t.x2.s2.e, 1); + s->x3.s2.e ^= ROR32(t.x3.s2.e, 5); + s->x3.s2.o ^= ROR32(t.x3.s2.o, 5); + s->x4.s2.e ^= ROR32(t.x4.s2.o, 3); + s->x4.s2.o ^= ROR32(t.x4.s2.e, 4); + t.x0.s1.e = s->x0.s1.e ^ ROR32(s->x0.s1.o, 4); + t.x0.s1.o = s->x0.s1.o ^ ROR32(s->x0.s1.e, 5); + t.x1.s1.e = s->x1.s1.e ^ ROR32(s->x1.s1.e, 11); + t.x1.s1.o = s->x1.s1.o ^ ROR32(s->x1.s1.o, 11); + t.x2.s1.e = s->x2.s1.e ^ ROR32(s->x2.s1.o, 2); + t.x2.s1.o = s->x2.s1.o ^ ROR32(s->x2.s1.e, 3); + t.x3.s1.e = s->x3.s1.e ^ ROR32(s->x3.s1.o, 3); + t.x3.s1.o = s->x3.s1.o ^ ROR32(s->x3.s1.e, 4); + t.x4.s1.e = s->x4.s1.e ^ ROR32(s->x4.s1.e, 17); + t.x4.s1.o = s->x4.s1.o ^ ROR32(s->x4.s1.o, 17); + s->x0.s1.e ^= ROR32(t.x0.s1.o, 9); + s->x0.s1.o ^= ROR32(t.x0.s1.e, 10); + s->x1.s1.e ^= ROR32(t.x1.s1.o, 19); + s->x1.s1.o ^= ROR32(t.x1.s1.e, 20); + s->x2.s1.e ^= t.x2.s1.o; + s->x2.s1.o ^= ROR32(t.x2.s1.e, 1); + s->x3.s1.e ^= ROR32(t.x3.s1.e, 5); + s->x3.s1.o ^= ROR32(t.x3.s1.o, 5); + s->x4.s1.e ^= ROR32(t.x4.s1.o, 3); + s->x4.s1.o ^= ROR32(t.x4.s1.e, 4); + t.x0.s0.e = s->x0.s0.e ^ ROR32(s->x0.s0.o, 4); + t.x0.s0.o = s->x0.s0.o ^ ROR32(s->x0.s0.e, 5); + t.x1.s0.e = s->x1.s0.e ^ ROR32(s->x1.s0.e, 11); + t.x1.s0.o = s->x1.s0.o ^ ROR32(s->x1.s0.o, 11); + t.x2.s0.e = s->x2.s0.e ^ ROR32(s->x2.s0.o, 2); + t.x2.s0.o = s->x2.s0.o ^ ROR32(s->x2.s0.e, 3); + t.x3.s0.e = s->x3.s0.e ^ ROR32(s->x3.s0.o, 3); + t.x3.s0.o = s->x3.s0.o ^ ROR32(s->x3.s0.e, 4); + t.x4.s0.e = s->x4.s0.e ^ ROR32(s->x4.s0.e, 17); + t.x4.s0.o = s->x4.s0.o ^ ROR32(s->x4.s0.o, 17); + s->x0.s0.e ^= ROR32(t.x0.s0.o, 9); + s->x0.s0.o ^= ROR32(t.x0.s0.e, 10); + s->x1.s0.e ^= ROR32(t.x1.s0.o, 19); + s->x1.s0.o ^= ROR32(t.x1.s0.e, 20); + s->x2.s0.e ^= t.x2.s0.o; + s->x2.s0.o ^= ROR32(t.x2.s0.e, 1); + s->x3.s0.e ^= ROR32(t.x3.s0.e, 5); + s->x3.s0.o ^= ROR32(t.x3.s0.o, 5); + s->x4.s0.e ^= ROR32(t.x4.s0.o, 3); + s->x4.s0.o ^= ROR32(t.x4.s0.e, 4); + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_3shares/word.c b/ascon/Implementations/crypto_aead/ascon128av12/bi32_3shares/word.c new file mode 100644 index 0000000..b2dc0f3 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_3shares/word.c @@ -0,0 +1,40 @@ +#include "word.h" + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +share_t TOBI32(share_t in) { + uint32_t r0, r1; + uint32_t lo = in.e; + uint32_t hi = in.o; + r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); + r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); + r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); + r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); + r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); + r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); + r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); + r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); + r0 = (lo & 0x0000FFFF) | (hi << 16); + r1 = (lo >> 16) | (hi & 0xFFFF0000); + in.e = r0; + in.o = r1; + return in; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +share_t FROMBI32(share_t in) { + uint32_t r0 = in.e; + uint32_t r1 = in.o; + uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); + uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000); + r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); + r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); + r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); + r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); + r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); + r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); + r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); + r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); + in.e = lo; + in.o = hi; + return in; +} diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_3shares/word.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_3shares/word.h new file mode 100644 index 0000000..45c250d --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_3shares/word.h @@ -0,0 +1,146 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#include "config.h" +#include "random.h" + +typedef struct { + uint32_t e; + uint32_t o; +} share_t; + +typedef struct { + share_t s0; + share_t s1; + share_t s2; +} word_t; + +__forceinline word_t WORD_T(uint64_t x) { + word_t w; + w.s0.e = (uint32_t)x; + w.s0.o = x >> 32; + w.s1.e = 0; + w.s1.o = 0; + w.s2.e = 0; + w.s2.o = 0; + return w; +} + +__forceinline uint64_t UINT64_T(word_t w) { + return (uint64_t)w.s0.o << 32 | w.s0.e; +} + +share_t TOBI32(share_t in); + +share_t FROMBI32(share_t in); + +__forceinline word_t TOSHARES(uint64_t in) { + uint32_t r0 = rand32(); + uint32_t r1 = rand32(); + uint32_t r2 = rand32(); + uint32_t r3 = rand32(); + word_t w; + w.s0.e = (uint32_t)in ^ r0 ^ r2; + w.s0.o = (in >> 32) ^ r1 ^ r3; + w.s1.e = r0; + w.s1.o = r1; + w.s2.e = r2; + w.s2.o = r3; + return w; +} + +__forceinline uint64_t FROMSHARES(word_t in) { + return (uint64_t)(in.s0.o ^ in.s1.o ^ in.s2.o) << 32 | + (in.s0.e ^ in.s1.e ^ in.s2.e); +} + +__forceinline word_t U64TOWORD(uint64_t x) { +#if ASCON_MASK_LOADS + word_t w = TOSHARES(x); + w.s0 = TOBI32(w.s0); + w.s1 = TOBI32(w.s1); + w.s2 = TOBI32(w.s2); + return w; +#else + word_t w = WORD_T(x); + w.s0 = TOBI32(w.s0); + return w; +#endif +} + +__forceinline uint64_t WORDTOU64(word_t w) { + w.s0 = FROMBI32(w.s0); + w.s1 = FROMBI32(w.s1); + w.s2 = FROMBI32(w.s2); + return (uint64_t)FROMSHARES(w); +} + +#define XOR(a, b) \ + do { \ + word_t tb = b; \ + (a).s0.e ^= tb.s0.e; \ + (a).s0.o ^= tb.s0.o; \ + (a).s1.e ^= tb.s1.e; \ + (a).s1.o ^= tb.s1.o; \ + (a).s2.e ^= tb.s2.e; \ + (a).s2.o ^= tb.s2.o; \ + } while (0) + +#define AND(a, b) \ + do { \ + word_t ta = a; \ + word_t tb = b; \ + (a).s0.e = \ + (ta.s0.e & tb.s0.e) ^ (ta.s0.e & tb.s1.e) ^ (ta.s0.e & tb.s2.e); \ + (a).s0.o = \ + (ta.s0.o & tb.s0.o) ^ (ta.s0.o & tb.s1.o) ^ (ta.s0.o & tb.s2.o); \ + (a).s1.e = \ + (ta.s1.e & tb.s0.e) ^ (ta.s1.e & tb.s1.e) ^ (ta.s1.e & tb.s2.e); \ + (a).s1.o = \ + (ta.s1.o & tb.s0.o) ^ (ta.s1.o & tb.s1.o) ^ (ta.s1.o & tb.s2.o); \ + (a).s2.e = \ + (ta.s2.e & tb.s0.e) ^ (ta.s2.e & tb.s1.e) ^ (ta.s2.e & tb.s2.e); \ + (a).s2.o = \ + (ta.s2.o & tb.s0.o) ^ (ta.s2.o & tb.s1.o) ^ (ta.s2.o & tb.s2.o); \ + } while (0) + +__forceinline uint32_t ROR32(uint32_t x, int n) { + return x >> n | x << (32 - n); +} + +__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { + word_t r; + r.s0.e = lo2hi.s0.e << 16 | hi2lo.s0.e >> 16; + r.s0.o = lo2hi.s0.o << 16 | hi2lo.s0.o >> 16; + r.s1.e = lo2hi.s1.e << 16 | hi2lo.s1.e >> 16; + r.s1.o = lo2hi.s1.o << 16 | hi2lo.s1.o >> 16; + r.s2.e = lo2hi.s2.e << 16 | hi2lo.s2.e >> 16; + r.s2.o = lo2hi.s2.o << 16 | hi2lo.s2.o >> 16; + return r; +} + +__forceinline int NOTZERO(word_t a, word_t b) { + int result = 0; + for (int i = 0; i < 8; ++i) + result |= ((uint8_t*)&(a.s0))[i] ^ ((uint8_t*)&(a.s1))[i] ^ + ((uint8_t*)&(a.s2))[i]; + for (int i = 0; i < 8; ++i) + result |= ((uint8_t*)&(b.s0))[i] ^ ((uint8_t*)&(b.s1))[i] ^ + ((uint8_t*)&(b.s2))[i]; + return result; +} + +/* set padding byte in 64-bit Ascon word */ +__forceinline word_t PAD(int i) { + return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32); +} + +/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ +__forceinline word_t XMASK(int n) { + uint32_t mask = 0x0fffffff >> (n * 4 - 4); + return WORD_T((uint64_t)mask << 32 | mask); +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/api.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/api.h index a4aa567..951ee9c 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/api.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/api.h @@ -3,3 +3,4 @@ #define CRYPTO_NPUBBYTES 16 #define CRYPTO_ABYTES 16 #define CRYPTO_NOOVERLAP 1 +#define ASCON_RATE 16 diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/ascon.c b/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/ascon.c new file mode 100644 index 0000000..9011a77 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/ascon.c @@ -0,0 +1,250 @@ +#include "ascon.h" + +#include "api.h" +#include "loadstore.h" +#include "permutations.h" +#include "printstate.h" + +__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2, + const uint8_t* k) { + KINIT(K0, K1, K2); + if (CRYPTO_KEYBYTES == 20) { + XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4))); + k += 4; + } + XOR(*K1, LOAD64(k)); + XOR(*K2, LOAD64(k + 8)); +} + +__forceinline void init(state_t* s, const uint8_t* npub, const uint8_t* k) { + word_t N0, N1; + word_t K0, K1, K2; + /* load nonce */ + N0 = LOAD64(npub); + N1 = LOAD64(npub + 8); + /* load key */ + loadkey(&K0, &K1, &K2, k); + /* initialization */ + PINIT(s); + XOR(s->x0, IV); + if (CRYPTO_KEYBYTES == 20) XOR(s->x0, K0); + XOR(s->x1, K1); + XOR(s->x2, K2); + XOR(s->x3, N0); + XOR(s->x4, N1); + P12(s); + if (CRYPTO_KEYBYTES == 20) XOR(s->x2, K0); + XOR(s->x3, K1); + XOR(s->x4, K2); + printstate("initialization", s); +} + +__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + word_t* restrict px; + /* process associated data */ + if (adlen) { + while (adlen >= ASCON_RATE) { + XOR(s->x0, LOAD64(ad)); + if (ASCON_RATE == 16) XOR(s->x1, LOAD64(ad + 8)); + PB(s); + ad += ASCON_RATE; + adlen -= ASCON_RATE; + } + /* final associated data block */ + px = &s->x0; + if (ASCON_RATE == 16 && adlen >= 8) { + XOR(s->x0, LOAD64(ad)); + px = &s->x1; + ad += 8; + adlen -= 8; + } + if (adlen) XOR(*px, LOAD(ad, adlen)); + XOR(*px, PAD(adlen)); + PB(s); + } + XOR(s->x4, WORD_T(1)); + printstate("process associated data", s); +} + +__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m, + uint64_t mlen) { + word_t* restrict px; + /* process plaintext */ + while (mlen >= ASCON_RATE) { + XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + if (ASCON_RATE == 16) { + XOR(s->x1, LOAD64(m + 8)); + STORE64(c + 8, s->x1); + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + mlen -= ASCON_RATE; + } + /* final plaintext block */ + px = &s->x0; + if (ASCON_RATE == 16 && mlen >= 8) { + XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + px = &s->x1; + m += 8; + c += 8; + mlen -= 8; + } + if (mlen) { + XOR(*px, LOAD(m, mlen)); + STORE(c, *px, mlen); + } + XOR(*px, PAD(mlen)); + printstate("process plaintext", s); +} + +__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c, + uint64_t clen) { + word_t* restrict px; + word_t cx; + /* process ciphertext */ + while (clen >= ASCON_RATE) { + cx = LOAD64(c); + XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + if (ASCON_RATE == 16) { + cx = LOAD64(c + 8); + XOR(s->x1, cx); + STORE64(m + 8, s->x1); + s->x1 = cx; + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + clen -= ASCON_RATE; + } + /* final ciphertext block */ + px = &s->x0; + if (ASCON_RATE == 16 && clen >= 8) { + cx = LOAD64(c); + XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + px = &s->x1; + m += 8; + c += 8; + clen -= 8; + } + if (clen) { + cx = LOAD(c, clen); + XOR(*px, cx); + STORE(m, *px, clen); + AND(*px, XMASK(clen)); + XOR(*px, cx); + } + XOR(*px, PAD(clen)); + printstate("process ciphertext", s); +} + +__forceinline void final(state_t* s, const uint8_t* k) { + word_t K0, K1, K2; + /* load key */ + loadkey(&K0, &K1, &K2, k); + /* finalization */ + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) { + XOR(s->x1, K1); + XOR(s->x2, K2); + } + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { + XOR(s->x2, K1); + XOR(s->x3, K2); + } + if (CRYPTO_KEYBYTES == 20) { + XOR(s->x1, KEYROT(K0, K1)); + XOR(s->x2, KEYROT(K1, K2)); + XOR(s->x3, KEYROT(K2, WORD_T(0))); + } + P12(s); + XOR(s->x3, K1); + XOR(s->x4, K2); + printstate("finalization", s); +} + +#if !ASCON_INLINE_MODE + +#define INIT ascon_init +#define ABSORB ascon_absorb +#define ENCRYPT ascon_encrypt +#define DECRYPT ascon_decrypt +#define FINAL ascon_final + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) { + init(s, npub, k); +} + +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + absorb(s, ad, adlen); +} + +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) { + encrypt(s, c, m, mlen); +} + +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) { + decrypt(s, m, c, clen); +} + +void ascon_final(state_t* s, const uint8_t* k) { final(s, k); } + +#else + +#define INIT init +#define ABSORB absorb +#define ENCRYPT encrypt +#define DECRYPT decrypt +#define FINAL final + +#endif + +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + state_t s; + (void)nsec; + *clen = mlen + CRYPTO_ABYTES; + /* perform ascon computation */ + INIT(&s, npub, k); + ABSORB(&s, ad, adlen); + ENCRYPT(&s, c, m, mlen); + FINAL(&s, k); + /* set tag */ + c += mlen; + STORE64(c, s.x3); + STORE64(c + 8, s.x4); + return 0; +} + +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { + state_t s; + (void)nsec; + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + *mlen = clen = clen - CRYPTO_ABYTES; + /* perform ascon computation */ + INIT(&s, npub, k); + ABSORB(&s, ad, adlen); + DECRYPT(&s, m, c, clen); + FINAL(&s, k); + /* verify tag (should be constant time, check compiler output) */ + c += clen; + XOR(s.x3, LOAD64(c)); + XOR(s.x4, LOAD64(c + 8)); + if (NOTZERO(s.x3, s.x4)) { + *mlen = 0; + return -1; + } + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/ascon.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/ascon.h new file mode 100644 index 0000000..10a5b6e --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/ascon.h @@ -0,0 +1,19 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#include "config.h" +#include "word.h" + +typedef struct { + word_t x0, x1, x2, x3, x4; +} state_t; + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k); +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen); +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); +void ascon_final(state_t* s, const uint8_t* k); + +#endif // ASCON_H_ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/config.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/config.h new file mode 100644 index 0000000..4242e2e --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/config.h @@ -0,0 +1,34 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* inline the Ascon mode */ +#ifndef ASCON_INLINE_MODE +#define ASCON_INLINE_MODE 1 +#endif + +/* inline the Ascon permutations */ +#ifndef ASCON_INLINE_PERM +#define ASCON_INLINE_PERM 0 +#endif + +/* single function for all permutations */ +#ifndef ASCON_SINGLE_PERM +#define ASCON_SINGLE_PERM 0 +#endif + +/* unroll the permutation loops */ +#ifndef ASCON_UNROLL_LOOPS +#define ASCON_UNROLL_LOOPS 1 +#endif + +/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ +#ifndef ASCON_DATA_ACCESS +#define ASCON_DATA_ACCESS 'H' +#endif + +/* make sure __forceinline is supported */ +#ifndef __forceinline +#define __forceinline inline __attribute__((always_inline)) +#endif + +#endif /* CONFIG_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/endian.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/endian.h index ffbdcd9..3944360 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/endian.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/endian.h @@ -3,29 +3,37 @@ #if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -// macros for big endian machines +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif #define U64BIG(x) (x) #define U32BIG(x) (x) #define U16BIG(x) (x) #elif defined(_MSC_VER) || \ - (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -// macros for little endian machines -#define U64BIG(x) \ - ((((x) & 0x00000000000000FFULL) << 56) | (((x) & 0x000000000000FF00ULL) << 40) | \ - (((x) & 0x0000000000FF0000ULL) << 24) | (((x) & 0x00000000FF000000ULL) << 8) | \ - (((x) & 0x000000FF00000000ULL) >> 8) | (((x) & 0x0000FF0000000000ULL) >> 24) | \ - (((x) & 0x00FF000000000000ULL) >> 40) | (((x) & 0xFF00000000000000ULL) >> 56)) +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) #define U32BIG(x) \ - ((((x) & 0x000000FF) << 24) | (((x) & 0x0000FF00) << 8) | \ - (((x) & 0x00FF0000) >> 8) | (((x) & 0xFF000000) >> 24)) -#define U16BIG(x) \ - ((((x) & 0x00FF) << 8) | (((x) & 0xFF00) >> 8)) + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) #else -#error "ascon byte order macros not defined in endian.h" +#error "Ascon byte order macros not defined in endian.h" #endif -#endif // ENDIAN_H_ - +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/loadstore.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/loadstore.h new file mode 100644 index 0000000..6cb0cca --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/loadstore.h @@ -0,0 +1,118 @@ +#ifndef LOADSTORE_H_ +#define LOADSTORE_H_ + +#include + +#include "config.h" +#include "endian.h" +#include "word.h" + +/* 64-bit LSB mask (undefined for n == 0) */ +#define MASK(n) (~0ull >> (64 - (n))) + +/* get byte from Ascon 64-bit word */ +#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i)))) + +/* set byte in Ascon 64-bit word */ +#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i))) + +#if ASCON_DATA_ACCESS == 'W' + +#ifndef NDEBUG +#pragma message("Using wordwise data access") +#endif + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = U64BIG(*(uint64_t*)bytes); + return U64TOWORD(x); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = U64BIG(*(uint64_t*)bytes & MASK(8 * n)); + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(8 * n); + *(uint64_t*)bytes |= U64BIG(x); +} + +#elif ASCON_DATA_ACCESS == 'M' + +#ifndef NDEBUG +#pragma message("Using memcpy to access data") +#endif + +#include + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + memcpy((uint8_t*)&x, bytes, n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = U64BIG(WORDTOU64(w)); + memcpy(bytes, (uint8_t*)&x, n); +} + +#elif ASCON_DATA_ACCESS == 'B' + +#ifndef NDEBUG +#pragma message("Using bytewise data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#elif ASCON_DATA_ACCESS == 'H' + +#ifndef NDEBUG +#pragma message("Using hybrid data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + if (n == 8) + x = U64BIG(*(uint64_t*)bytes); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + if (n == 8) + *(uint64_t*)bytes = U64BIG(x); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#else +#error "Ascon data access macro not defined correctly" +#endif + +#endif /* LOADSTORE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/permutations.c b/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/permutations.c new file mode 100644 index 0000000..1bca2ef --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/permutations.c @@ -0,0 +1,35 @@ +#include "permutations.h" + +#include "round.h" + +#if !ASCON_UNROLL_LOOPS || ASCON_SINGLE_PERM + +const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, + {0x6, 0xc}, {0x3, 0xc}, {0x6, 0x9}, {0x3, 0x9}, + {0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}}; + +#endif + +#if ASCON_INLINE_PERM + +#elif ASCON_SINGLE_PERM + +void P(state_t* s, uint8_t rounds) { + printstate(" permutation input", s); + for (int i = START(rounds); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s) { P12ROUNDS(s); } + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16 +void P8(state_t* s) { P8ROUNDS(s); } +#endif + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 +void P6(state_t* s) { P6ROUNDS(s); } +#endif + +#endif diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/permutations.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/permutations.h index 08b8d75..ef338f1 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/permutations.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/permutations.h @@ -1,157 +1,189 @@ #ifndef PERMUTATIONS_H_ #define PERMUTATIONS_H_ -#include "endian.h" - -typedef unsigned char u8; -typedef unsigned int u32; -typedef unsigned long long u64; - -typedef struct { - u32 e; - u32 o; -} u32_2; - -#define EXT_BYTE64(x, n) ((u8)((u64)(x) >> (8 * (7 - (n))))) -#define INS_BYTE64(x, n) ((u64)(x) << (8 * (7 - (n)))) - -// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 -#define to_bit_interleaving(out, in) \ - do { \ - u32 hi = (in) >> 32; \ - u32 lo = (u32)(in); \ - u32 r0, r1; \ - r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \ - r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \ - r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \ - r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \ - r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \ - r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \ - r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \ - r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \ - (out).e = (lo & 0x0000FFFF) | (hi << 16); \ - (out).o = (lo >> 16) | (hi & 0xFFFF0000); \ - } while (0) - -// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 -#define from_bit_interleaving(out, in) \ - do { \ - u32 lo = ((in).e & 0x0000FFFF) | ((in).o << 16); \ - u32 hi = ((in).e >> 16) | ((in).o & 0xFFFF0000); \ - u32 r0, r1; \ - r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \ - r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \ - r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \ - r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \ - r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \ - r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \ - r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \ - r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \ - out = (u64)hi << 32 | lo; \ - } while (0) - -#define ROUND(C_e, C_o) \ - do { \ - u32 reg0, reg1, reg2, reg3; \ - __asm__ __volatile__ ( \ - "eor %[x2_e], %[x2_e], #" #C_e "\n\t" \ - "eor %[x2_o], %[x2_o], #" #C_o "\n\t" \ - "eor %[x0_e], %[x0_e], %[x4_e]\n\t" \ - "eor %[x0_o], %[x0_o], %[x4_o]\n\t" \ - "eor %[x4_e], %[x4_e], %[x3_e]\n\t" \ - "eor %[x4_o], %[x4_o], %[x3_o]\n\t" \ - "eor %[x2_e], %[x2_e], %[x1_e]\n\t" \ - "eor %[x2_o], %[x2_o], %[x1_o]\n\t" \ - "bic %[reg0], %[x0_e], %[x4_e]\n\t" \ - "bic %[reg1], %[x4_e], %[x3_e]\n\t" \ - "bic %[reg2], %[x2_e], %[x1_e]\n\t" \ - "bic %[reg3], %[x1_e], %[x0_e]\n\t" \ - "eor %[x2_e], %[x2_e], %[reg1]\n\t" \ - "eor %[x0_e], %[x0_e], %[reg2]\n\t" \ - "eor %[x4_e], %[x4_e], %[reg3]\n\t" \ - "bic %[reg3], %[x3_e], %[x2_e]\n\t" \ - "eor %[x3_e], %[x3_e], %[reg0]\n\t" \ - "bic %[reg2], %[x0_o], %[x4_o]\n\t" \ - "bic %[reg0], %[x2_o], %[x1_o]\n\t" \ - "bic %[reg1], %[x4_o], %[x3_o]\n\t" \ - "eor %[x1_e], %[x1_e], %[reg3]\n\t" \ - "eor %[x0_o], %[x0_o], %[reg0]\n\t" \ - "eor %[x2_o], %[x2_o], %[reg1]\n\t" \ - "bic %[reg3], %[x1_o], %[x0_o]\n\t" \ - "bic %[reg0], %[x3_o], %[x2_o]\n\t" \ - "eor %[x3_o], %[x3_o], %[reg2]\n\t" \ - "eor %[x3_o], %[x3_o], %[x2_o]\n\t" \ - "eor %[x4_o], %[x4_o], %[reg3]\n\t" \ - "eor %[x1_o], %[x1_o], %[reg0]\n\t" \ - "eor %[x3_e], %[x3_e], %[x2_e]\n\t" \ - "eor %[x1_e], %[x1_e], %[x0_e]\n\t" \ - "eor %[x1_o], %[x1_o], %[x0_o]\n\t" \ - "eor %[x0_e], %[x0_e], %[x4_e]\n\t" \ - "eor %[x0_o], %[x0_o], %[x4_o]\n\t" \ - "mvn %[x2_e], %[x2_e]\n\t" \ - "mvn %[x2_o], %[x2_o]\n\t" \ - "eor %[reg0], %[x0_e], %[x0_o], ror #4\n\t" \ - "eor %[reg1], %[x0_o], %[x0_e], ror #5\n\t" \ - "eor %[reg2], %[x1_e], %[x1_e], ror #11\n\t" \ - "eor %[reg3], %[x1_o], %[x1_o], ror #11\n\t" \ - "eor %[x0_e], %[x0_e], %[reg1], ror #9\n\t" \ - "eor %[x0_o], %[x0_o], %[reg0], ror #10\n\t" \ - "eor %[x1_e], %[x1_e], %[reg3], ror #19\n\t" \ - "eor %[x1_o], %[x1_o], %[reg2], ror #20\n\t" \ - "eor %[reg0], %[x2_e], %[x2_o], ror #2\n\t" \ - "eor %[reg1], %[x2_o], %[x2_e], ror #3\n\t" \ - "eor %[reg2], %[x3_e], %[x3_o], ror #3\n\t" \ - "eor %[reg3], %[x3_o], %[x3_e], ror #4\n\t" \ - "eor %[x2_e], %[x2_e], %[reg1]\n\t" \ - "eor %[x2_o], %[x2_o], %[reg0], ror #1\n\t" \ - "eor %[x3_e], %[x3_e], %[reg2], ror #5\n\t" \ - "eor %[x3_o], %[x3_o], %[reg3], ror #5\n\t" \ - "eor %[reg0], %[x4_e], %[x4_e], ror #17\n\t" \ - "eor %[reg1], %[x4_o], %[x4_o], ror #17\n\t" \ - "eor %[x4_e], %[x4_e], %[reg1], ror #3\n\t" \ - "eor %[x4_o], %[x4_o], %[reg0], ror #4\n\t" \ - : [x0_e] "+r" (x0.e), [x1_e] "+r" (x1.e), [x2_e] "+r" (x2.e), [x3_e] "+r" (x3.e), [x4_e] "+r" (x4.e), \ - [x0_o] "+r" (x0.o), [x1_o] "+r" (x1.o), [x2_o] "+r" (x2.o), [x3_o] "+r" (x3.o), [x4_o] "+r" (x4.o), \ - [reg0] "=r" (reg0), [reg1] "=r" (reg1), [reg2] "=r" (reg2), [reg3] "=r" (reg3)::); \ - } while (0) - -#define P12() \ - do { \ - ROUND(0xc, 0xc); \ - ROUND(0x9, 0xc); \ - ROUND(0xc, 0x9); \ - ROUND(0x9, 0x9); \ - ROUND(0x6, 0xc); \ - ROUND(0x3, 0xc); \ - ROUND(0x6, 0x9); \ - ROUND(0x3, 0x9); \ - ROUND(0xc, 0x6); \ - ROUND(0x9, 0x6); \ - ROUND(0xc, 0x3); \ - ROUND(0x9, 0x3); \ - } while (0) - -#define P8() \ - do { \ - ROUND(0x6, 0xc); \ - ROUND(0x3, 0xc); \ - ROUND(0x6, 0x9); \ - ROUND(0x3, 0x9); \ - ROUND(0xc, 0x6); \ - ROUND(0x9, 0x6); \ - ROUND(0xc, 0x3); \ - ROUND(0x9, 0x3); \ - } while (0) - -#define P6() \ - do { \ - ROUND(0x6, 0x9); \ - ROUND(0x3, 0x9); \ - ROUND(0xc, 0x6); \ - ROUND(0x9, 0x6); \ - ROUND(0xc, 0x3); \ - ROUND(0x9, 0x3); \ - } while (0) - -#endif // PERMUTATIONS_H_ +#include +#include "api.h" +#include "ascon.h" +#include "printstate.h" +#include "round.h" + +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 + +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 + +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 + +#define ASCON_HASH_BYTES 32 + +#define ASCON_128_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_128A_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) + +#define ASCON_80PQ_IV \ + U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_HASH_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) + +#define ASCON_XOF_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) + +#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) +#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) +#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) +#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) +#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) + +#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) +#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) +#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) +#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) +#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 +#define IV ASCON_128_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#if ASCON_RATE == 16 +#define IV ASCON_128A_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define PB P8 +#endif + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 +#define IV ASCON_80PQ_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#define START(n) (12 - n) + +#if ASCON_UNROLL_LOOPS + +__forceinline void P12ROUNDS(state_t* s) { + ROUND(s, 0xc, 0xc); + ROUND(s, 0x9, 0xc); + ROUND(s, 0xc, 0x9); + ROUND(s, 0x9, 0x9); + ROUND(s, 0x6, 0xc); + ROUND(s, 0x3, 0xc); + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +__forceinline void P8ROUNDS(state_t* s) { + ROUND(s, 0x6, 0xc); + ROUND(s, 0x3, 0xc); + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +__forceinline void P6ROUNDS(state_t* s) { + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +#else /* !ASCON_UNROLL_LOOPS */ + +extern const uint8_t constants[][2]; + +__forceinline void P12ROUNDS(state_t* s) { + for (int i = START(12); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +__forceinline void P8ROUNDS(state_t* s) { + for (int i = START(8); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +__forceinline void P6ROUNDS(state_t* s) { + for (int i = START(6); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +#endif + +#if ASCON_INLINE_PERM + +__forceinline void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +__forceinline void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} + +__forceinline void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#elif ASCON_SINGLE_PERM + +#define P12(s) P(s, 12) +#define P8(s) P(s, 8) +#define P6(s) P(s, 6) + +void P(state_t* s, uint8_t rounds); + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s); +void P8(state_t* s); +void P6(state_t* s); + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#endif + +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/printstate.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/printstate.h new file mode 100644 index 0000000..34bd476 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/printstate.h @@ -0,0 +1,31 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/round.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/round.h new file mode 100644 index 0000000..06da1ca --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/round.h @@ -0,0 +1,102 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" + +__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { + *K0 = WORD_T(0); + *K1 = WORD_T(0); + *K2 = WORD_T(0); +} + +__forceinline void PINIT(state_t* s) { + s->x0 = WORD_T(0); + s->x1 = WORD_T(0); + s->x2 = WORD_T(0); + s->x3 = WORD_T(0); + s->x4 = WORD_T(0); +} + +__forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) { + uint32_t tmp0, tmp1, tmp2, tmp3; + /* clang-format off */ + __asm__ __volatile__( \ + "eor %[x2_e], %[x2_e], %[C_e]\n\t" \ + "eor %[x2_o], %[x2_o], %[C_o]\n\t" \ + "eor %[x0_e], %[x0_e], %[x4_e]\n\t" \ + "eor %[x0_o], %[x0_o], %[x4_o]\n\t" \ + "eor %[x4_e], %[x4_e], %[x3_e]\n\t" \ + "eor %[x4_o], %[x4_o], %[x3_o]\n\t" \ + "eor %[x2_e], %[x2_e], %[x1_e]\n\t" \ + "eor %[x2_o], %[x2_o], %[x1_o]\n\t" \ + "bic %[tmp0], %[x0_e], %[x4_e]\n\t" \ + "bic %[tmp1], %[x4_e], %[x3_e]\n\t" \ + "bic %[tmp2], %[x2_e], %[x1_e]\n\t" \ + "bic %[tmp3], %[x1_e], %[x0_e]\n\t" \ + "eor %[x2_e], %[x2_e], %[tmp1]\n\t" \ + "eor %[x0_e], %[x0_e], %[tmp2]\n\t" \ + "eor %[x4_e], %[x4_e], %[tmp3]\n\t" \ + "bic %[tmp3], %[x3_e], %[x2_e]\n\t" \ + "eor %[x3_e], %[x3_e], %[tmp0]\n\t" \ + "bic %[tmp2], %[x0_o], %[x4_o]\n\t" \ + "bic %[tmp0], %[x2_o], %[x1_o]\n\t" \ + "bic %[tmp1], %[x4_o], %[x3_o]\n\t" \ + "eor %[x1_e], %[x1_e], %[tmp3]\n\t" \ + "eor %[x0_o], %[x0_o], %[tmp0]\n\t" \ + "eor %[x2_o], %[x2_o], %[tmp1]\n\t" \ + "bic %[tmp3], %[x1_o], %[x0_o]\n\t" \ + "bic %[tmp0], %[x3_o], %[x2_o]\n\t" \ + "eor %[x3_o], %[x3_o], %[tmp2]\n\t" \ + "eor %[x3_o], %[x3_o], %[x2_o]\n\t" \ + "eor %[x4_o], %[x4_o], %[tmp3]\n\t" \ + "eor %[x1_o], %[x1_o], %[tmp0]\n\t" \ + "eor %[x3_e], %[x3_e], %[x2_e]\n\t" \ + "eor %[x1_e], %[x1_e], %[x0_e]\n\t" \ + "eor %[x1_o], %[x1_o], %[x0_o]\n\t" \ + "eor %[x0_e], %[x0_e], %[x4_e]\n\t" \ + "eor %[x0_o], %[x0_o], %[x4_o]\n\t" \ + "mvn %[x2_e], %[x2_e]\n\t" \ + "mvn %[x2_o], %[x2_o]\n\t" \ + "eor %[tmp0], %[x0_e], %[x0_o], ror #4\n\t" \ + "eor %[tmp1], %[x0_o], %[x0_e], ror #5\n\t" \ + "eor %[tmp2], %[x1_e], %[x1_e], ror #11\n\t" \ + "eor %[tmp3], %[x1_o], %[x1_o], ror #11\n\t" \ + "eor %[x0_e], %[x0_e], %[tmp1], ror #9\n\t" \ + "eor %[x0_o], %[x0_o], %[tmp0], ror #10\n\t" \ + "eor %[x1_e], %[x1_e], %[tmp3], ror #19\n\t" \ + "eor %[x1_o], %[x1_o], %[tmp2], ror #20\n\t" \ + "eor %[tmp0], %[x2_e], %[x2_o], ror #2\n\t" \ + "eor %[tmp1], %[x2_o], %[x2_e], ror #3\n\t" \ + "eor %[tmp2], %[x3_e], %[x3_o], ror #3\n\t" \ + "eor %[tmp3], %[x3_o], %[x3_e], ror #4\n\t" \ + "eor %[x2_e], %[x2_e], %[tmp1]\n\t" \ + "eor %[x2_o], %[x2_o], %[tmp0], ror #1\n\t" \ + "eor %[x3_e], %[x3_e], %[tmp2], ror #5\n\t" \ + "eor %[x3_o], %[x3_o], %[tmp3], ror #5\n\t" \ + "eor %[tmp0], %[x4_e], %[x4_e], ror #17\n\t" \ + "eor %[tmp1], %[x4_o], %[x4_o], ror #17\n\t" \ + "eor %[x4_e], %[x4_e], %[tmp1], ror #3\n\t" \ + "eor %[x4_o], %[x4_o], %[tmp0], ror #4\n\t" \ + : [ x0_e ] "+r"(s->x0.e), \ + [ x1_e ] "+r"(s->x1.e), \ + [ x2_e ] "+r"(s->x2.e), \ + [ x3_e ] "+r"(s->x3.e), \ + [ x4_e ] "+r"(s->x4.e), \ + [ x0_o ] "+r"(s->x0.o), \ + [ x1_o ] "+r"(s->x1.o), \ + [ x2_o ] "+r"(s->x2.o), \ + [ x3_o ] "+r"(s->x3.o), \ + [ x4_o ] "+r"(s->x4.o), \ + [ tmp0 ] "=r"(tmp0), \ + [ tmp1 ] "=r"(tmp1), \ + [ tmp2 ] "=r"(tmp2), \ + [ tmp3 ] "=r"(tmp3) \ + : [ C_e ] "i"(C_e), \ + [ C_o ] "i"(C_o) \ + : ); + /* clang-format on */ + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/word.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/word.h new file mode 100644 index 0000000..8ffcaaa --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_arm/word.h @@ -0,0 +1,117 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#include "config.h" + +typedef struct { + uint32_t e; + uint32_t o; +} word_t; + +__forceinline word_t WORD_T(uint64_t x) { + return (word_t){.o = x >> 32, .e = x}; +} + +__forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; } + +__forceinline uint64_t TOBI32(uint64_t in); + +__forceinline uint64_t FROMBI32(uint64_t in); + +__forceinline word_t U64TOWORD(uint64_t x) { + uint64_t w = TOBI32(x); + return (word_t){.o = w >> 32, .e = w}; +} + +__forceinline uint64_t WORDTOU64(word_t w) { + return FROMBI32((uint64_t)w.o << 32 | w.e); +} + +#define XOR(a, b) \ + do { \ + word_t tb = b; \ + (a).e ^= tb.e; \ + (a).o ^= tb.o; \ + } while (0) + +#define AND(a, b) \ + do { \ + word_t tb = b; \ + (a).e &= tb.e; \ + (a).o &= tb.o; \ + } while (0) + +__forceinline uint32_t ROR32(uint32_t x, int n) { + return x >> n | x << (32 - n); +} + +__forceinline word_t ROR64(word_t x, int n) { + word_t r; + r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); + r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); + return r; +} + +__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { + word_t r; + r.o = lo2hi.o << 16 | hi2lo.o >> 16; + r.e = lo2hi.e << 16 | hi2lo.e >> 16; + return r; +} + +__forceinline int NOTZERO(word_t a, word_t b) { + int result = 0; + for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i]; + for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i]; + return result; +} + +/* set padding byte in 64-bit Ascon word */ +__forceinline word_t PAD(int i) { + return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32); +} + +/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ +__forceinline word_t XMASK(int n) { + uint32_t mask = 0x0fffffff >> (n * 4 - 4); + return WORD_T((uint64_t)mask << 32 | mask); +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t TOBI32(uint64_t in) { + uint32_t hi = in >> 32; + uint32_t lo = in; + uint32_t r0, r1; + r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); + r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); + r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); + r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); + r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); + r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); + r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); + r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); + r0 = (lo & 0x0000FFFF) | (hi << 16); + r1 = (lo >> 16) | (hi & 0xFFFF0000); + return (uint64_t)r1 << 32 | r0; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t FROMBI32(uint64_t in) { + uint32_t r0 = in; + uint32_t r1 = in >> 32; + uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); + uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000); + r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); + r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); + r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); + r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); + r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); + r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); + r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); + r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); + return (uint64_t)hi << 32 | lo; +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/api.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/api.h index a4aa567..951ee9c 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/api.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/api.h @@ -3,3 +3,4 @@ #define CRYPTO_NPUBBYTES 16 #define CRYPTO_ABYTES 16 #define CRYPTO_NOOVERLAP 1 +#define ASCON_RATE 16 diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/ascon.c b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/ascon.c new file mode 100644 index 0000000..9011a77 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/ascon.c @@ -0,0 +1,250 @@ +#include "ascon.h" + +#include "api.h" +#include "loadstore.h" +#include "permutations.h" +#include "printstate.h" + +__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2, + const uint8_t* k) { + KINIT(K0, K1, K2); + if (CRYPTO_KEYBYTES == 20) { + XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4))); + k += 4; + } + XOR(*K1, LOAD64(k)); + XOR(*K2, LOAD64(k + 8)); +} + +__forceinline void init(state_t* s, const uint8_t* npub, const uint8_t* k) { + word_t N0, N1; + word_t K0, K1, K2; + /* load nonce */ + N0 = LOAD64(npub); + N1 = LOAD64(npub + 8); + /* load key */ + loadkey(&K0, &K1, &K2, k); + /* initialization */ + PINIT(s); + XOR(s->x0, IV); + if (CRYPTO_KEYBYTES == 20) XOR(s->x0, K0); + XOR(s->x1, K1); + XOR(s->x2, K2); + XOR(s->x3, N0); + XOR(s->x4, N1); + P12(s); + if (CRYPTO_KEYBYTES == 20) XOR(s->x2, K0); + XOR(s->x3, K1); + XOR(s->x4, K2); + printstate("initialization", s); +} + +__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + word_t* restrict px; + /* process associated data */ + if (adlen) { + while (adlen >= ASCON_RATE) { + XOR(s->x0, LOAD64(ad)); + if (ASCON_RATE == 16) XOR(s->x1, LOAD64(ad + 8)); + PB(s); + ad += ASCON_RATE; + adlen -= ASCON_RATE; + } + /* final associated data block */ + px = &s->x0; + if (ASCON_RATE == 16 && adlen >= 8) { + XOR(s->x0, LOAD64(ad)); + px = &s->x1; + ad += 8; + adlen -= 8; + } + if (adlen) XOR(*px, LOAD(ad, adlen)); + XOR(*px, PAD(adlen)); + PB(s); + } + XOR(s->x4, WORD_T(1)); + printstate("process associated data", s); +} + +__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m, + uint64_t mlen) { + word_t* restrict px; + /* process plaintext */ + while (mlen >= ASCON_RATE) { + XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + if (ASCON_RATE == 16) { + XOR(s->x1, LOAD64(m + 8)); + STORE64(c + 8, s->x1); + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + mlen -= ASCON_RATE; + } + /* final plaintext block */ + px = &s->x0; + if (ASCON_RATE == 16 && mlen >= 8) { + XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + px = &s->x1; + m += 8; + c += 8; + mlen -= 8; + } + if (mlen) { + XOR(*px, LOAD(m, mlen)); + STORE(c, *px, mlen); + } + XOR(*px, PAD(mlen)); + printstate("process plaintext", s); +} + +__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c, + uint64_t clen) { + word_t* restrict px; + word_t cx; + /* process ciphertext */ + while (clen >= ASCON_RATE) { + cx = LOAD64(c); + XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + if (ASCON_RATE == 16) { + cx = LOAD64(c + 8); + XOR(s->x1, cx); + STORE64(m + 8, s->x1); + s->x1 = cx; + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + clen -= ASCON_RATE; + } + /* final ciphertext block */ + px = &s->x0; + if (ASCON_RATE == 16 && clen >= 8) { + cx = LOAD64(c); + XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + px = &s->x1; + m += 8; + c += 8; + clen -= 8; + } + if (clen) { + cx = LOAD(c, clen); + XOR(*px, cx); + STORE(m, *px, clen); + AND(*px, XMASK(clen)); + XOR(*px, cx); + } + XOR(*px, PAD(clen)); + printstate("process ciphertext", s); +} + +__forceinline void final(state_t* s, const uint8_t* k) { + word_t K0, K1, K2; + /* load key */ + loadkey(&K0, &K1, &K2, k); + /* finalization */ + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) { + XOR(s->x1, K1); + XOR(s->x2, K2); + } + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { + XOR(s->x2, K1); + XOR(s->x3, K2); + } + if (CRYPTO_KEYBYTES == 20) { + XOR(s->x1, KEYROT(K0, K1)); + XOR(s->x2, KEYROT(K1, K2)); + XOR(s->x3, KEYROT(K2, WORD_T(0))); + } + P12(s); + XOR(s->x3, K1); + XOR(s->x4, K2); + printstate("finalization", s); +} + +#if !ASCON_INLINE_MODE + +#define INIT ascon_init +#define ABSORB ascon_absorb +#define ENCRYPT ascon_encrypt +#define DECRYPT ascon_decrypt +#define FINAL ascon_final + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) { + init(s, npub, k); +} + +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + absorb(s, ad, adlen); +} + +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) { + encrypt(s, c, m, mlen); +} + +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) { + decrypt(s, m, c, clen); +} + +void ascon_final(state_t* s, const uint8_t* k) { final(s, k); } + +#else + +#define INIT init +#define ABSORB absorb +#define ENCRYPT encrypt +#define DECRYPT decrypt +#define FINAL final + +#endif + +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + state_t s; + (void)nsec; + *clen = mlen + CRYPTO_ABYTES; + /* perform ascon computation */ + INIT(&s, npub, k); + ABSORB(&s, ad, adlen); + ENCRYPT(&s, c, m, mlen); + FINAL(&s, k); + /* set tag */ + c += mlen; + STORE64(c, s.x3); + STORE64(c + 8, s.x4); + return 0; +} + +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { + state_t s; + (void)nsec; + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + *mlen = clen = clen - CRYPTO_ABYTES; + /* perform ascon computation */ + INIT(&s, npub, k); + ABSORB(&s, ad, adlen); + DECRYPT(&s, m, c, clen); + FINAL(&s, k); + /* verify tag (should be constant time, check compiler output) */ + c += clen; + XOR(s.x3, LOAD64(c)); + XOR(s.x4, LOAD64(c + 8)); + if (NOTZERO(s.x3, s.x4)) { + *mlen = 0; + return -1; + } + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/ascon.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/ascon.h new file mode 100644 index 0000000..10a5b6e --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/ascon.h @@ -0,0 +1,19 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#include "config.h" +#include "word.h" + +typedef struct { + word_t x0, x1, x2, x3, x4; +} state_t; + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k); +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen); +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); +void ascon_final(state_t* s, const uint8_t* k); + +#endif // ASCON_H_ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/config.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/config.h new file mode 100644 index 0000000..5ccce77 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/config.h @@ -0,0 +1,34 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* inline the Ascon mode */ +#ifndef ASCON_INLINE_MODE +#define ASCON_INLINE_MODE 1 +#endif + +/* inline the Ascon permutations */ +#ifndef ASCON_INLINE_PERM +#define ASCON_INLINE_PERM 0 +#endif + +/* single function for all permutations */ +#ifndef ASCON_SINGLE_PERM +#define ASCON_SINGLE_PERM 0 +#endif + +/* unroll the permutation loops */ +#ifndef ASCON_UNROLL_LOOPS +#define ASCON_UNROLL_LOOPS 1 +#endif + +/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ +#ifndef ASCON_DATA_ACCESS +#define ASCON_DATA_ACCESS 'M' +#endif + +/* make sure __forceinline is supported */ +#ifndef __forceinline +#define __forceinline inline __attribute__((always_inline)) +#endif + +#endif /* CONFIG_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/endian.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/endian.h index ffbdcd9..3944360 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/endian.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/endian.h @@ -3,29 +3,37 @@ #if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -// macros for big endian machines +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif #define U64BIG(x) (x) #define U32BIG(x) (x) #define U16BIG(x) (x) #elif defined(_MSC_VER) || \ - (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -// macros for little endian machines -#define U64BIG(x) \ - ((((x) & 0x00000000000000FFULL) << 56) | (((x) & 0x000000000000FF00ULL) << 40) | \ - (((x) & 0x0000000000FF0000ULL) << 24) | (((x) & 0x00000000FF000000ULL) << 8) | \ - (((x) & 0x000000FF00000000ULL) >> 8) | (((x) & 0x0000FF0000000000ULL) >> 24) | \ - (((x) & 0x00FF000000000000ULL) >> 40) | (((x) & 0xFF00000000000000ULL) >> 56)) +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) #define U32BIG(x) \ - ((((x) & 0x000000FF) << 24) | (((x) & 0x0000FF00) << 8) | \ - (((x) & 0x00FF0000) >> 8) | (((x) & 0xFF000000) >> 24)) -#define U16BIG(x) \ - ((((x) & 0x00FF) << 8) | (((x) & 0xFF00) >> 8)) + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) #else -#error "ascon byte order macros not defined in endian.h" +#error "Ascon byte order macros not defined in endian.h" #endif -#endif // ENDIAN_H_ - +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/loadstore.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/loadstore.h new file mode 100644 index 0000000..6cb0cca --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/loadstore.h @@ -0,0 +1,118 @@ +#ifndef LOADSTORE_H_ +#define LOADSTORE_H_ + +#include + +#include "config.h" +#include "endian.h" +#include "word.h" + +/* 64-bit LSB mask (undefined for n == 0) */ +#define MASK(n) (~0ull >> (64 - (n))) + +/* get byte from Ascon 64-bit word */ +#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i)))) + +/* set byte in Ascon 64-bit word */ +#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i))) + +#if ASCON_DATA_ACCESS == 'W' + +#ifndef NDEBUG +#pragma message("Using wordwise data access") +#endif + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = U64BIG(*(uint64_t*)bytes); + return U64TOWORD(x); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = U64BIG(*(uint64_t*)bytes & MASK(8 * n)); + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(8 * n); + *(uint64_t*)bytes |= U64BIG(x); +} + +#elif ASCON_DATA_ACCESS == 'M' + +#ifndef NDEBUG +#pragma message("Using memcpy to access data") +#endif + +#include + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + memcpy((uint8_t*)&x, bytes, n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = U64BIG(WORDTOU64(w)); + memcpy(bytes, (uint8_t*)&x, n); +} + +#elif ASCON_DATA_ACCESS == 'B' + +#ifndef NDEBUG +#pragma message("Using bytewise data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#elif ASCON_DATA_ACCESS == 'H' + +#ifndef NDEBUG +#pragma message("Using hybrid data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + if (n == 8) + x = U64BIG(*(uint64_t*)bytes); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + if (n == 8) + *(uint64_t*)bytes = U64BIG(x); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#else +#error "Ascon data access macro not defined correctly" +#endif + +#endif /* LOADSTORE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/permutations.c b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/permutations.c new file mode 100644 index 0000000..1bca2ef --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/permutations.c @@ -0,0 +1,35 @@ +#include "permutations.h" + +#include "round.h" + +#if !ASCON_UNROLL_LOOPS || ASCON_SINGLE_PERM + +const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, + {0x6, 0xc}, {0x3, 0xc}, {0x6, 0x9}, {0x3, 0x9}, + {0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}}; + +#endif + +#if ASCON_INLINE_PERM + +#elif ASCON_SINGLE_PERM + +void P(state_t* s, uint8_t rounds) { + printstate(" permutation input", s); + for (int i = START(rounds); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s) { P12ROUNDS(s); } + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16 +void P8(state_t* s) { P8ROUNDS(s); } +#endif + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 +void P6(state_t* s) { P6ROUNDS(s); } +#endif + +#endif diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/permutations.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/permutations.h index 10ae468..ef338f1 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/permutations.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/permutations.h @@ -1,134 +1,189 @@ #ifndef PERMUTATIONS_H_ #define PERMUTATIONS_H_ -#include "endian.h" - -typedef unsigned char u8; -typedef unsigned int u32; -typedef unsigned long long u64; - -typedef struct { - u32 e; - u32 o; -} u32_2; - -#define EXT_BYTE64(x, n) ((u8)((u64)(x) >> (8 * (7 - (n))))) -#define INS_BYTE64(x, n) ((u64)(x) << (8 * (7 - (n)))) -#define ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n)))) - -// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 -#define to_bit_interleaving(out, in) \ - do { \ - u32 hi = (in) >> 32; \ - u32 lo = (u32)(in); \ - u32 r0, r1; \ - r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \ - r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \ - r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \ - r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \ - r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \ - r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \ - r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \ - r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \ - (out).e = (lo & 0x0000FFFF) | (hi << 16); \ - (out).o = (lo >> 16) | (hi & 0xFFFF0000); \ - } while (0) - -// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 -#define from_bit_interleaving(out, in) \ - do { \ - u32 lo = ((in).e & 0x0000FFFF) | ((in).o << 16); \ - u32 hi = ((in).e >> 16) | ((in).o & 0xFFFF0000); \ - u32 r0, r1; \ - r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \ - r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \ - r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \ - r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \ - r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \ - r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \ - r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \ - r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \ - out = (u64)hi << 32 | lo; \ - } while (0) - -#define ROUND(C_e, C_o) \ - do { \ - /* round constant */ \ - x2.e ^= C_e; x2.o ^= C_o; \ - /* s-box layer */ \ - x0.e ^= x4.e; x0.o ^= x4.o; \ - x4.e ^= x3.e; x4.o ^= x3.o; \ - x2.e ^= x1.e; x2.o ^= x1.o; \ - t0.e = x0.e & (~x4.e); t0.o = x0.o & (~x4.o); \ - x0.e ^= x2.e & (~x1.e); x0.o ^= x2.o & (~x1.o); \ - x2.e ^= x4.e & (~x3.e); x2.o ^= x4.o & (~x3.o); \ - x4.e ^= x1.e & (~x0.e); x4.o ^= x1.o & (~x0.o); \ - x1.e ^= x3.e & (~x2.e); x1.o ^= x3.o & (~x2.o); \ - x3.e ^= t0.e; x3.o ^= t0.o; \ - x1.e ^= x0.e; x1.o ^= x0.o; \ - x3.e ^= x2.e; x3.o ^= x2.o; \ - x0.e ^= x4.e; x0.o ^= x4.o; \ - /* linear layer */ \ - t0.e = x0.e ^ ROTR32(x0.o, 4); \ - t0.o = x0.o ^ ROTR32(x0.e, 5); \ - x0.e ^= ROTR32(t0.o, 9); \ - x0.o ^= ROTR32(t0.e, 10); \ - t0.e = x1.e ^ ROTR32(x1.e, 11); \ - t0.o = x1.o ^ ROTR32(x1.o, 11); \ - x1.e ^= ROTR32(t0.o, 19); \ - x1.o ^= ROTR32(t0.e, 20); \ - t0.e = x2.e ^ ROTR32(x2.o, 2); \ - t0.o = x2.o ^ ROTR32(x2.e, 3); \ - x2.e ^= t0.o; \ - x2.o ^= ROTR32(t0.e, 1); \ - t0.e = x3.e ^ ROTR32(x3.o, 3); \ - t0.o = x3.o ^ ROTR32(x3.e, 4); \ - x3.e ^= ROTR32(t0.e, 5); \ - x3.o ^= ROTR32(t0.o, 5); \ - t0.e = x4.e ^ ROTR32(x4.e, 17); \ - t0.o = x4.o ^ ROTR32(x4.o, 17); \ - x4.e ^= ROTR32(t0.o, 3); \ - x4.o ^= ROTR32(t0.e, 4); \ - x2.e = ~x2.e; x2.o = ~x2.o; \ - } while(0) - -#define P12() \ - do { \ - ROUND(0xc, 0xc); \ - ROUND(0x9, 0xc); \ - ROUND(0xc, 0x9); \ - ROUND(0x9, 0x9); \ - ROUND(0x6, 0xc); \ - ROUND(0x3, 0xc); \ - ROUND(0x6, 0x9); \ - ROUND(0x3, 0x9); \ - ROUND(0xc, 0x6); \ - ROUND(0x9, 0x6); \ - ROUND(0xc, 0x3); \ - ROUND(0x9, 0x3); \ - } while (0) - -#define P8() \ - do { \ - ROUND(0x6, 0xc); \ - ROUND(0x3, 0xc); \ - ROUND(0x6, 0x9); \ - ROUND(0x3, 0x9); \ - ROUND(0xc, 0x6); \ - ROUND(0x9, 0x6); \ - ROUND(0xc, 0x3); \ - ROUND(0x9, 0x3); \ - } while (0) - -#define P6() \ - do { \ - ROUND(0x6, 0x9); \ - ROUND(0x3, 0x9); \ - ROUND(0xc, 0x6); \ - ROUND(0x9, 0x6); \ - ROUND(0xc, 0x3); \ - ROUND(0x9, 0x3); \ - } while (0) - -#endif // PERMUTATIONS_H_ +#include +#include "api.h" +#include "ascon.h" +#include "printstate.h" +#include "round.h" + +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 + +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 + +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 + +#define ASCON_HASH_BYTES 32 + +#define ASCON_128_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_128A_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) + +#define ASCON_80PQ_IV \ + U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_HASH_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) + +#define ASCON_XOF_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) + +#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) +#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) +#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) +#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) +#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) + +#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) +#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) +#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) +#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) +#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 +#define IV ASCON_128_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#if ASCON_RATE == 16 +#define IV ASCON_128A_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define PB P8 +#endif + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 +#define IV ASCON_80PQ_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#define START(n) (12 - n) + +#if ASCON_UNROLL_LOOPS + +__forceinline void P12ROUNDS(state_t* s) { + ROUND(s, 0xc, 0xc); + ROUND(s, 0x9, 0xc); + ROUND(s, 0xc, 0x9); + ROUND(s, 0x9, 0x9); + ROUND(s, 0x6, 0xc); + ROUND(s, 0x3, 0xc); + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +__forceinline void P8ROUNDS(state_t* s) { + ROUND(s, 0x6, 0xc); + ROUND(s, 0x3, 0xc); + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +__forceinline void P6ROUNDS(state_t* s) { + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +#else /* !ASCON_UNROLL_LOOPS */ + +extern const uint8_t constants[][2]; + +__forceinline void P12ROUNDS(state_t* s) { + for (int i = START(12); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +__forceinline void P8ROUNDS(state_t* s) { + for (int i = START(8); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +__forceinline void P6ROUNDS(state_t* s) { + for (int i = START(6); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +#endif + +#if ASCON_INLINE_PERM + +__forceinline void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +__forceinline void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} + +__forceinline void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#elif ASCON_SINGLE_PERM + +#define P12(s) P(s, 12) +#define P8(s) P(s, 8) +#define P6(s) P(s, 6) + +void P(state_t* s, uint8_t rounds); + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s); +void P8(state_t* s); +void P6(state_t* s); + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#endif + +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/printstate.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/printstate.h new file mode 100644 index 0000000..34bd476 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/printstate.h @@ -0,0 +1,31 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/round.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/round.h new file mode 100644 index 0000000..bc7a0cd --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/round.h @@ -0,0 +1,77 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" + +__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { + *K0 = WORD_T(0); + *K1 = WORD_T(0); + *K2 = WORD_T(0); +} + +__forceinline void PINIT(state_t* s) { + s->x0 = WORD_T(0); + s->x1 = WORD_T(0); + s->x2 = WORD_T(0); + s->x3 = WORD_T(0); + s->x4 = WORD_T(0); +} + +__forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) { + uint32_t tmp_e, tmp_o; + /* round constant */ + s->x2.e ^= C_e; + s->x2.o ^= C_o; + /* s-box layer */ + s->x0.e ^= s->x4.e; + s->x0.o ^= s->x4.o; + s->x4.e ^= s->x3.e; + s->x4.o ^= s->x3.o; + s->x2.e ^= s->x1.e; + s->x2.o ^= s->x1.o; + tmp_e = s->x0.e & (~s->x4.e); + tmp_o = s->x0.o & (~s->x4.o); + s->x0.e ^= s->x2.e & (~s->x1.e); + s->x0.o ^= s->x2.o & (~s->x1.o); + s->x2.e ^= s->x4.e & (~s->x3.e); + s->x2.o ^= s->x4.o & (~s->x3.o); + s->x4.e ^= s->x1.e & (~s->x0.e); + s->x4.o ^= s->x1.o & (~s->x0.o); + s->x1.e ^= s->x3.e & (~s->x2.e); + s->x1.o ^= s->x3.o & (~s->x2.o); + s->x3.e ^= tmp_e; + s->x3.o ^= tmp_o; + s->x1.e ^= s->x0.e; + s->x1.o ^= s->x0.o; + s->x3.e ^= s->x2.e; + s->x3.o ^= s->x2.o; + s->x0.e ^= s->x4.e; + s->x0.o ^= s->x4.o; + /* linear layer */ + tmp_e = s->x0.e ^ ROR32(s->x0.o, 4); + tmp_o = s->x0.o ^ ROR32(s->x0.e, 5); + s->x0.e ^= ROR32(tmp_o, 9); + s->x0.o ^= ROR32(tmp_e, 10); + tmp_e = s->x1.e ^ ROR32(s->x1.e, 11); + tmp_o = s->x1.o ^ ROR32(s->x1.o, 11); + s->x1.e ^= ROR32(tmp_o, 19); + s->x1.o ^= ROR32(tmp_e, 20); + tmp_e = s->x2.e ^ ROR32(s->x2.o, 2); + tmp_o = s->x2.o ^ ROR32(s->x2.e, 3); + s->x2.e ^= tmp_o; + s->x2.o ^= ROR32(tmp_e, 1); + tmp_e = s->x3.e ^ ROR32(s->x3.o, 3); + tmp_o = s->x3.o ^ ROR32(s->x3.e, 4); + s->x3.e ^= ROR32(tmp_e, 5); + s->x3.o ^= ROR32(tmp_o, 5); + tmp_e = s->x4.e ^ ROR32(s->x4.e, 17); + tmp_o = s->x4.o ^ ROR32(s->x4.o, 17); + s->x4.e ^= ROR32(tmp_o, 3); + s->x4.o ^= ROR32(tmp_e, 4); + s->x2.e = ~s->x2.e; + s->x2.o = ~s->x2.o; + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/word.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/word.h new file mode 100644 index 0000000..8ffcaaa --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowreg/word.h @@ -0,0 +1,117 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#include "config.h" + +typedef struct { + uint32_t e; + uint32_t o; +} word_t; + +__forceinline word_t WORD_T(uint64_t x) { + return (word_t){.o = x >> 32, .e = x}; +} + +__forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; } + +__forceinline uint64_t TOBI32(uint64_t in); + +__forceinline uint64_t FROMBI32(uint64_t in); + +__forceinline word_t U64TOWORD(uint64_t x) { + uint64_t w = TOBI32(x); + return (word_t){.o = w >> 32, .e = w}; +} + +__forceinline uint64_t WORDTOU64(word_t w) { + return FROMBI32((uint64_t)w.o << 32 | w.e); +} + +#define XOR(a, b) \ + do { \ + word_t tb = b; \ + (a).e ^= tb.e; \ + (a).o ^= tb.o; \ + } while (0) + +#define AND(a, b) \ + do { \ + word_t tb = b; \ + (a).e &= tb.e; \ + (a).o &= tb.o; \ + } while (0) + +__forceinline uint32_t ROR32(uint32_t x, int n) { + return x >> n | x << (32 - n); +} + +__forceinline word_t ROR64(word_t x, int n) { + word_t r; + r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); + r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); + return r; +} + +__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { + word_t r; + r.o = lo2hi.o << 16 | hi2lo.o >> 16; + r.e = lo2hi.e << 16 | hi2lo.e >> 16; + return r; +} + +__forceinline int NOTZERO(word_t a, word_t b) { + int result = 0; + for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i]; + for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i]; + return result; +} + +/* set padding byte in 64-bit Ascon word */ +__forceinline word_t PAD(int i) { + return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32); +} + +/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ +__forceinline word_t XMASK(int n) { + uint32_t mask = 0x0fffffff >> (n * 4 - 4); + return WORD_T((uint64_t)mask << 32 | mask); +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t TOBI32(uint64_t in) { + uint32_t hi = in >> 32; + uint32_t lo = in; + uint32_t r0, r1; + r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); + r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); + r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); + r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); + r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); + r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); + r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); + r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); + r0 = (lo & 0x0000FFFF) | (hi << 16); + r1 = (lo >> 16) | (hi & 0xFFFF0000); + return (uint64_t)r1 << 32 | r0; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t FROMBI32(uint64_t in) { + uint32_t r0 = in; + uint32_t r1 = in >> 32; + uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); + uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000); + r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); + r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); + r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); + r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); + r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); + r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); + r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); + r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); + return (uint64_t)hi << 32 | lo; +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/api.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/api.h index a4aa567..951ee9c 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/api.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/api.h @@ -3,3 +3,4 @@ #define CRYPTO_NPUBBYTES 16 #define CRYPTO_ABYTES 16 #define CRYPTO_NOOVERLAP 1 +#define ASCON_RATE 16 diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/ascon.c b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/ascon.c new file mode 100644 index 0000000..5e60f1a --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/ascon.c @@ -0,0 +1,63 @@ +#include "ascon.h" + +#include "api.h" +#include "loadstore.h" +#include "permutations.h" +#include "printstate.h" + +void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, + uint8_t mode); + +void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen, + const uint8_t* ad, uint64_t adlen, const uint8_t* npub, + const uint8_t* k, uint8_t mode) { + word_t N0, N1, K0, K1, K2; + /* load nonce */ + N0 = LOAD64(npub); + N1 = LOAD64(npub + 8); + /* load key */ + if (CRYPTO_KEYBYTES == 20) { + K0 = KEYROT(WORD_T(0), LOAD(k, 4)); + k += 4; + } + K1 = LOAD64(k); + K2 = LOAD64(k + 8); + /* initialization */ + s->x0 = IV; + if (CRYPTO_KEYBYTES == 20) XOR(s->x0, K0); + s->x1 = K1; + s->x2 = K2; + s->x3 = N0; + s->x4 = N1; + P12(s); + if (CRYPTO_KEYBYTES == 20) XOR(s->x2, K0); + XOR(s->x3, K1); + XOR(s->x4, K2); + printstate("initialization", s); + /* process associated data */ + if (adlen) { + process_data(s, (void*)0, ad, adlen, ASCON_AD); + PB(s); + } + XOR(s->x4, WORD_T(1)); + /* process plaintext/ciphertext */ + process_data(s, out, in, tlen, mode); + /* finalization */ + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) { + XOR(s->x1, K1); + XOR(s->x2, K2); + } + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { + XOR(s->x2, K1); + XOR(s->x3, K2); + } + if (CRYPTO_KEYBYTES == 20) { + XOR(s->x1, KEYROT(K0, K1)); + XOR(s->x2, KEYROT(K1, K2)); + XOR(s->x3, KEYROT(K2, WORD_T(0))); + } + P12(s); + XOR(s->x3, K1); + XOR(s->x4, K2); + printstate("finalization", s); +} diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/ascon.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/ascon.h new file mode 100644 index 0000000..aa685d3 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/ascon.h @@ -0,0 +1,24 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#include "config.h" +#include "word.h" + +typedef struct { + word_t x0, x1, x2, x3, x4; +} state_t; + +#define ASCON_AD 0 +#define ASCON_ENC 1 +#define ASCON_DEC 2 + +void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, + uint8_t mode); + +void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen, + const uint8_t* ad, uint64_t adlen, const uint8_t* npub, + const uint8_t* k, uint8_t mode); + +#endif // ASCON_H_ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/config.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/config.h new file mode 100644 index 0000000..19426ab --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/config.h @@ -0,0 +1,34 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* inline the Ascon mode */ +#ifndef ASCON_INLINE_MODE +#define ASCON_INLINE_MODE 1 +#endif + +/* inline the Ascon permutations */ +#ifndef ASCON_INLINE_PERM +#define ASCON_INLINE_PERM 0 +#endif + +/* single function for all permutations */ +#ifndef ASCON_SINGLE_PERM +#define ASCON_SINGLE_PERM 1 +#endif + +/* unroll the permutation loops */ +#ifndef ASCON_UNROLL_LOOPS +#define ASCON_UNROLL_LOOPS 1 +#endif + +/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ +#ifndef ASCON_DATA_ACCESS +#define ASCON_DATA_ACCESS 'B' +#endif + +/* make sure __forceinline is supported */ +#ifndef __forceinline +#define __forceinline inline __attribute__((always_inline)) +#endif + +#endif /* CONFIG_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/decrypt.c b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/decrypt.c index 7e9dd1a..605bd03 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/decrypt.c +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/decrypt.c @@ -1,32 +1,32 @@ -#include "core.h" +#include "api.h" +#include "ascon.h" +#include "loadstore.h" +#include "permutations.h" +#include "printstate.h" -int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen, - unsigned char* nsec, const unsigned char* c, - unsigned long long clen, const unsigned char* ad, - unsigned long long adlen, const unsigned char* npub, - const unsigned char* k) { +void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen, + const uint8_t* ad, uint64_t adlen, const uint8_t* npub, + const uint8_t* k, uint8_t mode); + +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { if (clen < CRYPTO_ABYTES) { *mlen = 0; return -1; } - - state s; - u32_2 t0, t1; + state_t s; (void)nsec; - - // set plaintext size + /* set plaintext size */ *mlen = clen - CRYPTO_ABYTES; - + /* ascon decryption */ ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DEC); - - // verify tag (should be constant time, check compiler output) - t0 = to_bit_interleaving(U64BIG(*(u64*)(c + *mlen))); - t1 = to_bit_interleaving(U64BIG(*(u64*)(c + *mlen + 8))); - if (((s.x3.e ^ t0.e) | (s.x3.o ^ t0.o) | (s.x4.e ^ t1.e) | (s.x4.o ^ t1.o)) != - 0) { + /* verify tag (should be constant time, check compiler output) */ + XOR(s.x3, LOAD64(c + *mlen)); + XOR(s.x4, LOAD64(c + *mlen + 8)); + if (NOTZERO(s.x3, s.x4)) { *mlen = 0; return -1; } - return 0; } diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/encrypt.c b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/encrypt.c index b5dc587..641b266 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/encrypt.c +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/encrypt.c @@ -1,24 +1,25 @@ -#include "core.h" +#include "api.h" +#include "ascon.h" +#include "loadstore.h" +#include "permutations.h" +#include "printstate.h" -int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen, - const unsigned char* m, unsigned long long mlen, - const unsigned char* ad, unsigned long long adlen, - const unsigned char* nsec, const unsigned char* npub, - const unsigned char* k) { - state s; - u64 tmp0, tmp1; - (void)nsec; +void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen, + const uint8_t* ad, uint64_t adlen, const uint8_t* npub, + const uint8_t* k, uint8_t mode); - // set ciphertext size +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + state_t s; + (void)nsec; + /* set ciphertext size */ *clen = mlen + CRYPTO_ABYTES; - + /* ascon encryption */ ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENC); - - // set tag - tmp0 = from_bit_interleaving(s.x3); - *(u64*)(c + mlen) = U64BIG(tmp0); - tmp1 = from_bit_interleaving(s.x4); - *(u64*)(c + mlen + 8) = U64BIG(tmp1); - + /* set tag */ + STORE64(c + mlen, s.x3); + STORE64(c + mlen + 8, s.x4); return 0; } diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/endian.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/endian.h index b4d18f5..3944360 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/endian.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/endian.h @@ -3,7 +3,10 @@ #if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -// macros for big endian machines +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif #define U64BIG(x) (x) #define U32BIG(x) (x) #define U16BIG(x) (x) @@ -11,19 +14,26 @@ #elif defined(_MSC_VER) || \ (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -// macros for little endian machines -#define U64BIG(x) \ - ((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) | \ - (((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) | \ - (((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) | \ - (((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56)) -#define U32BIG(x) \ - ((((x)&0x000000FF) << 24) | (((x)&0x0000FF00) << 8) | \ - (((x)&0x00FF0000) >> 8) | (((x)&0xFF000000) >> 24)) -#define U16BIG(x) ((((x)&0x00FF) << 8) | (((x)&0xFF00) >> 8)) +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) +#define U32BIG(x) \ + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) #else -#error "ascon byte order macros not defined in endian.h" +#error "Ascon byte order macros not defined in endian.h" #endif -#endif // ENDIAN_H_ +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/loadstore.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/loadstore.h new file mode 100644 index 0000000..6cb0cca --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/loadstore.h @@ -0,0 +1,118 @@ +#ifndef LOADSTORE_H_ +#define LOADSTORE_H_ + +#include + +#include "config.h" +#include "endian.h" +#include "word.h" + +/* 64-bit LSB mask (undefined for n == 0) */ +#define MASK(n) (~0ull >> (64 - (n))) + +/* get byte from Ascon 64-bit word */ +#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i)))) + +/* set byte in Ascon 64-bit word */ +#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i))) + +#if ASCON_DATA_ACCESS == 'W' + +#ifndef NDEBUG +#pragma message("Using wordwise data access") +#endif + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = U64BIG(*(uint64_t*)bytes); + return U64TOWORD(x); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = U64BIG(*(uint64_t*)bytes & MASK(8 * n)); + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(8 * n); + *(uint64_t*)bytes |= U64BIG(x); +} + +#elif ASCON_DATA_ACCESS == 'M' + +#ifndef NDEBUG +#pragma message("Using memcpy to access data") +#endif + +#include + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + memcpy((uint8_t*)&x, bytes, n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = U64BIG(WORDTOU64(w)); + memcpy(bytes, (uint8_t*)&x, n); +} + +#elif ASCON_DATA_ACCESS == 'B' + +#ifndef NDEBUG +#pragma message("Using bytewise data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#elif ASCON_DATA_ACCESS == 'H' + +#ifndef NDEBUG +#pragma message("Using hybrid data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + if (n == 8) + x = U64BIG(*(uint64_t*)bytes); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + if (n == 8) + *(uint64_t*)bytes = U64BIG(x); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#else +#error "Ascon data access macro not defined correctly" +#endif + +#endif /* LOADSTORE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/permutations.c b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/permutations.c index bc47f5f..1bca2ef 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/permutations.c +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/permutations.c @@ -1,50 +1,35 @@ #include "permutations.h" -static const u8 constants[][2] = { - {0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, {0x6, 0xc}, {0x3, 0xc}, - {0x6, 0x9}, {0x3, 0x9}, {0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}}; - -// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 -u32_2 to_bit_interleaving(u64 in) { - u32 hi = (in) >> 32; - u32 lo = (u32)(in); - u32 r0, r1; - u32_2 out; - r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); - r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); - r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); - r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); - r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); - r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); - r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); - r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); - out.e = (lo & 0x0000FFFF) | (hi << 16); - out.o = (lo >> 16) | (hi & 0xFFFF0000); - return out; -} +#include "round.h" -// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 -u64 from_bit_interleaving(u32_2 in) { - u32 lo = (in.e & 0x0000FFFF) | (in.o << 16); - u32 hi = (in.e >> 16) | (in.o & 0xFFFF0000); - u32 r0, r1; - u64 out; - r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); - r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); - r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); - r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); - r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); - r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); - r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); - r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); - out = (u64)hi << 32 | lo; - return out; -} +#if !ASCON_UNROLL_LOOPS || ASCON_SINGLE_PERM + +const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, + {0x6, 0xc}, {0x3, 0xc}, {0x6, 0x9}, {0x3, 0x9}, + {0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}}; + +#endif + +#if ASCON_INLINE_PERM + +#elif ASCON_SINGLE_PERM -void P(state *p, u8 rounds) { - state s = *p; - u32_2 t0, t1, t2, t3, t4; - u32 i, start = START_ROUND(rounds); - for (i = start; i < 12; i++) ROUND(constants[i][0], constants[i][1]); - *p = s; +void P(state_t* s, uint8_t rounds) { + printstate(" permutation input", s); + for (int i = START(rounds); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); } + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s) { P12ROUNDS(s); } + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16 +void P8(state_t* s) { P8ROUNDS(s); } +#endif + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 +void P6(state_t* s) { P6ROUNDS(s); } +#endif + +#endif diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/permutations.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/permutations.h index bc643ce..ef338f1 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/permutations.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/permutations.h @@ -1,71 +1,189 @@ #ifndef PERMUTATIONS_H_ #define PERMUTATIONS_H_ -typedef unsigned char u8; -typedef unsigned int u32; -typedef unsigned long long u64; - -typedef struct { - u32 e; - u32 o; -} u32_2; - -typedef struct { - u32_2 x0; - u32_2 x1; - u32_2 x2; - u32_2 x3; - u32_2 x4; -} state; - -#define EXT_BYTE64(x, n) ((u8)((u64)(x) >> (8 * (7 - (n))))) -#define INS_BYTE64(x, n) ((u64)(x) << (8 * (7 - (n)))) -#define ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n)))) -#define START_ROUND(x) (12 - (x)) - -// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 -u32_2 to_bit_interleaving(u64 in); - -// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 -u64 from_bit_interleaving(u32_2 in); - -/* clang-format off */ -#define ROUND(C_e, C_o) \ - do { \ - /* round constant */ \ - s.x2.e ^= C_e; s.x2.o ^= C_o; \ - /* s-box layer */ \ - s.x0.e ^= s.x4.e; s.x0.o ^= s.x4.o; \ - s.x4.e ^= s.x3.e; s.x4.o ^= s.x3.o; \ - s.x2.e ^= s.x1.e; s.x2.o ^= s.x1.o; \ - t0.e = s.x0.e; t0.o = s.x0.o; \ - t4.e = s.x4.e; t4.o = s.x4.o; \ - t3.e = s.x3.e; t3.o = s.x3.o; \ - t1.e = s.x1.e; t1.o = s.x1.o; \ - t2.e = s.x2.e; t2.o = s.x2.o; \ - s.x0.e = t0.e ^ (~t1.e & t2.e); s.x0.o = t0.o ^ (~t1.o & t2.o); \ - s.x2.e = t2.e ^ (~t3.e & t4.e); s.x2.o = t2.o ^ (~t3.o & t4.o); \ - s.x4.e = t4.e ^ (~t0.e & t1.e); s.x4.o = t4.o ^ (~t0.o & t1.o); \ - s.x1.e = t1.e ^ (~t2.e & t3.e); s.x1.o = t1.o ^ (~t2.o & t3.o); \ - s.x3.e = t3.e ^ (~t4.e & t0.e); s.x3.o = t3.o ^ (~t4.o & t0.o); \ - s.x1.e ^= s.x0.e; s.x1.o ^= s.x0.o; \ - s.x3.e ^= s.x2.e; s.x3.o ^= s.x2.o; \ - s.x0.e ^= s.x4.e; s.x0.o ^= s.x4.o; \ - /* linear layer */ \ - t0.e = s.x0.e ^ ROTR32(s.x0.o, 4); t0.o = s.x0.o ^ ROTR32(s.x0.e, 5); \ - t1.e = s.x1.e ^ ROTR32(s.x1.e, 11); t1.o = s.x1.o ^ ROTR32(s.x1.o, 11); \ - t2.e = s.x2.e ^ ROTR32(s.x2.o, 2); t2.o = s.x2.o ^ ROTR32(s.x2.e, 3); \ - t3.e = s.x3.e ^ ROTR32(s.x3.o, 3); t3.o = s.x3.o ^ ROTR32(s.x3.e, 4); \ - t4.e = s.x4.e ^ ROTR32(s.x4.e, 17); t4.o = s.x4.o ^ ROTR32(s.x4.o, 17); \ - s.x0.e ^= ROTR32(t0.o, 9); s.x0.o ^= ROTR32(t0.e, 10); \ - s.x1.e ^= ROTR32(t1.o, 19); s.x1.o ^= ROTR32(t1.e, 20); \ - s.x2.e ^= t2.o; s.x2.o ^= ROTR32(t2.e, 1); \ - s.x3.e ^= ROTR32(t3.e, 5); s.x3.o ^= ROTR32(t3.o, 5); \ - s.x4.e ^= ROTR32(t4.o, 3); s.x4.o ^= ROTR32(t4.e, 4); \ - s.x2.e = ~s.x2.e; s.x2.o = ~s.x2.o; \ - } while(0) -/* clang-format on */ - -void P(state *p, u8 rounds); - -#endif // PERMUTATIONS_H_ +#include + +#include "api.h" +#include "ascon.h" +#include "printstate.h" +#include "round.h" + +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 + +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 + +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 + +#define ASCON_HASH_BYTES 32 + +#define ASCON_128_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_128A_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) + +#define ASCON_80PQ_IV \ + U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_HASH_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) + +#define ASCON_XOF_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) + +#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) +#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) +#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) +#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) +#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) + +#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) +#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) +#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) +#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) +#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 +#define IV ASCON_128_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#if ASCON_RATE == 16 +#define IV ASCON_128A_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define PB P8 +#endif + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 +#define IV ASCON_80PQ_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#define START(n) (12 - n) + +#if ASCON_UNROLL_LOOPS + +__forceinline void P12ROUNDS(state_t* s) { + ROUND(s, 0xc, 0xc); + ROUND(s, 0x9, 0xc); + ROUND(s, 0xc, 0x9); + ROUND(s, 0x9, 0x9); + ROUND(s, 0x6, 0xc); + ROUND(s, 0x3, 0xc); + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +__forceinline void P8ROUNDS(state_t* s) { + ROUND(s, 0x6, 0xc); + ROUND(s, 0x3, 0xc); + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +__forceinline void P6ROUNDS(state_t* s) { + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +#else /* !ASCON_UNROLL_LOOPS */ + +extern const uint8_t constants[][2]; + +__forceinline void P12ROUNDS(state_t* s) { + for (int i = START(12); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +__forceinline void P8ROUNDS(state_t* s) { + for (int i = START(8); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +__forceinline void P6ROUNDS(state_t* s) { + for (int i = START(6); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +#endif + +#if ASCON_INLINE_PERM + +__forceinline void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +__forceinline void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} + +__forceinline void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#elif ASCON_SINGLE_PERM + +#define P12(s) P(s, 12) +#define P8(s) P(s, 8) +#define P6(s) P(s, 6) + +void P(state_t* s, uint8_t rounds); + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s); +void P8(state_t* s); +void P6(state_t* s); + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#endif + +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/printstate.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/printstate.h new file mode 100644 index 0000000..34bd476 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/printstate.h @@ -0,0 +1,31 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/process.c b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/process.c new file mode 100644 index 0000000..2ec9d1f --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/process.c @@ -0,0 +1,63 @@ +#include "api.h" +#include "ascon.h" +#include "loadstore.h" +#include "permutations.h" +#include "printstate.h" + +void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, + uint8_t mode) { + word_t tmp0, tmp1; + uint64_t t0, t1; + uint64_t i; + + while (len >= ASCON_RATE) { + tmp0 = LOAD64(in); + tmp1 = LOAD64(in + 8); + XOR(s->x0, tmp0); + XOR(s->x1, tmp1); + if (mode != ASCON_AD) { + STORE64(out, s->x0); + STORE64(out + 8, s->x1); + } + if (mode == ASCON_DEC) { + s->x0 = tmp0; + s->x1 = tmp1; + } + PB(s); + in += ASCON_RATE; + out += ASCON_RATE; + len -= ASCON_RATE; + } + + if (len) { + tmp1 = WORD_T(0); + if (len >= 8) tmp0 = LOAD64(in); + if (len > 8) + tmp1 = LOAD(in + 8, len - 8); + else + tmp0 = LOAD(in, len); + XOR(s->x0, tmp0); + XOR(s->x1, tmp1); + if (mode != ASCON_AD) { + if (len >= 8) STORE64(out, s->x0); + if (len > 8) + STORE(out + 8, s->x1, len - 8); + else + STORE(out, s->x0, len); + } + if (mode == ASCON_DEC) { + if (len >= 8) s->x0 = tmp0; + if (len > 8) { + AND(s->x1, XMASK(len - 8)); + XOR(s->x1, tmp1); + } else { + AND(s->x0, XMASK(len)); + XOR(s->x0, tmp0); + } + } + } + if (len < 8) + XOR(s->x0, PAD(len % 8)); + else + XOR(s->x1, PAD(len % 8)); +} diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/round.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/round.h new file mode 100644 index 0000000..d8ea3b6 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/round.h @@ -0,0 +1,85 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" + +__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { + *K0 = WORD_T(0); + *K1 = WORD_T(0); + *K2 = WORD_T(0); +} + +__forceinline void PINIT(state_t* s) { + s->x0 = WORD_T(0); + s->x1 = WORD_T(0); + s->x2 = WORD_T(0); + s->x3 = WORD_T(0); + s->x4 = WORD_T(0); +} + +__forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) { + state_t t; + /* round constant */ + s->x2.e ^= C_e; + s->x2.o ^= C_o; + /* s-box layer */ + s->x0.e ^= s->x4.e; + s->x0.o ^= s->x4.o; + s->x4.e ^= s->x3.e; + s->x4.o ^= s->x3.o; + s->x2.e ^= s->x1.e; + s->x2.o ^= s->x1.o; + t.x0.e = s->x0.e; + t.x0.o = s->x0.o; + t.x4.e = s->x4.e; + t.x4.o = s->x4.o; + t.x3.e = s->x3.e; + t.x3.o = s->x3.o; + t.x1.e = s->x1.e; + t.x1.o = s->x1.o; + t.x2.e = s->x2.e; + t.x2.o = s->x2.o; + s->x0.e = t.x0.e ^ (~t.x1.e & t.x2.e); + s->x0.o = t.x0.o ^ (~t.x1.o & t.x2.o); + s->x2.e = t.x2.e ^ (~t.x3.e & t.x4.e); + s->x2.o = t.x2.o ^ (~t.x3.o & t.x4.o); + s->x4.e = t.x4.e ^ (~t.x0.e & t.x1.e); + s->x4.o = t.x4.o ^ (~t.x0.o & t.x1.o); + s->x1.e = t.x1.e ^ (~t.x2.e & t.x3.e); + s->x1.o = t.x1.o ^ (~t.x2.o & t.x3.o); + s->x3.e = t.x3.e ^ (~t.x4.e & t.x0.e); + s->x3.o = t.x3.o ^ (~t.x4.o & t.x0.o); + s->x1.e ^= s->x0.e; + s->x1.o ^= s->x0.o; + s->x3.e ^= s->x2.e; + s->x3.o ^= s->x2.o; + s->x0.e ^= s->x4.e; + s->x0.o ^= s->x4.o; + /* linear layer */ + t.x0.e = s->x0.e ^ ROR32(s->x0.o, 4); + t.x0.o = s->x0.o ^ ROR32(s->x0.e, 5); + t.x1.e = s->x1.e ^ ROR32(s->x1.e, 11); + t.x1.o = s->x1.o ^ ROR32(s->x1.o, 11); + t.x2.e = s->x2.e ^ ROR32(s->x2.o, 2); + t.x2.o = s->x2.o ^ ROR32(s->x2.e, 3); + t.x3.e = s->x3.e ^ ROR32(s->x3.o, 3); + t.x3.o = s->x3.o ^ ROR32(s->x3.e, 4); + t.x4.e = s->x4.e ^ ROR32(s->x4.e, 17); + t.x4.o = s->x4.o ^ ROR32(s->x4.o, 17); + s->x0.e ^= ROR32(t.x0.o, 9); + s->x0.o ^= ROR32(t.x0.e, 10); + s->x1.e ^= ROR32(t.x1.o, 19); + s->x1.o ^= ROR32(t.x1.e, 20); + s->x2.e ^= t.x2.o; + s->x2.o ^= ROR32(t.x2.e, 1); + s->x3.e ^= ROR32(t.x3.e, 5); + s->x3.o ^= ROR32(t.x3.o, 5); + s->x4.e ^= ROR32(t.x4.o, 3); + s->x4.o ^= ROR32(t.x4.e, 4); + s->x2.e = ~s->x2.e; + s->x2.o = ~s->x2.o; + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/word.c b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/word.c new file mode 100644 index 0000000..0ac4e63 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/word.c @@ -0,0 +1,36 @@ +#include "word.h" + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +uint64_t TOBI32(uint64_t in) { + uint32_t hi = in >> 32; + uint32_t lo = in; + uint32_t r0, r1; + r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); + r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); + r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); + r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); + r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); + r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); + r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); + r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); + r0 = (lo & 0x0000FFFF) | (hi << 16); + r1 = (lo >> 16) | (hi & 0xFFFF0000); + return (uint64_t)r1 << 32 | r0; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +uint64_t FROMBI32(uint64_t in) { + uint32_t r0 = in; + uint32_t r1 = in >> 32; + uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); + uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000); + r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); + r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); + r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); + r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); + r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); + r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); + r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); + r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); + return (uint64_t)hi << 32 | lo; +} diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/word.h b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/word.h new file mode 100644 index 0000000..45184ca --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi32_lowsize/word.h @@ -0,0 +1,82 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#include "config.h" + +typedef struct { + uint32_t e; + uint32_t o; +} word_t; + +__forceinline word_t WORD_T(uint64_t x) { + return (word_t){.o = x >> 32, .e = x}; +} + +__forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; } + +uint64_t TOBI32(uint64_t in); + +uint64_t FROMBI32(uint64_t in); + +__forceinline word_t U64TOWORD(uint64_t x) { + uint64_t w = TOBI32(x); + return (word_t){.o = w >> 32, .e = w}; +} + +__forceinline uint64_t WORDTOU64(word_t w) { + return FROMBI32((uint64_t)w.o << 32 | w.e); +} + +#define XOR(a, b) \ + do { \ + word_t tb = b; \ + (a).e ^= tb.e; \ + (a).o ^= tb.o; \ + } while (0) + +#define AND(a, b) \ + do { \ + word_t tb = b; \ + (a).e &= tb.e; \ + (a).o &= tb.o; \ + } while (0) + +__forceinline uint32_t ROR32(uint32_t x, int n) { + return x >> n | x << (32 - n); +} + +__forceinline word_t ROR64(word_t x, int n) { + word_t r; + r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); + r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); + return r; +} + +__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { + word_t r; + r.o = lo2hi.o << 16 | hi2lo.o >> 16; + r.e = lo2hi.e << 16 | hi2lo.e >> 16; + return r; +} + +__forceinline int NOTZERO(word_t a, word_t b) { + int result = 0; + for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i]; + for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i]; + return result; +} + +/* set padding byte in 64-bit Ascon word */ +__forceinline word_t PAD(int i) { + return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32); +} + +/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ +__forceinline word_t XMASK(int n) { + uint32_t mask = 0x0fffffff >> (n * 4 - 4); + return WORD_T((uint64_t)mask << 32 | mask); +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi8/api.h b/ascon/Implementations/crypto_aead/ascon128av12/bi8/api.h index a4aa567..951ee9c 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/bi8/api.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi8/api.h @@ -3,3 +3,4 @@ #define CRYPTO_NPUBBYTES 16 #define CRYPTO_ABYTES 16 #define CRYPTO_NOOVERLAP 1 +#define ASCON_RATE 16 diff --git a/ascon/Implementations/crypto_aead/ascon128av12/bi8/ascon.c b/ascon/Implementations/crypto_aead/ascon128av12/bi8/ascon.c index 4027c4d..133db40 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/bi8/ascon.c +++ b/ascon/Implementations/crypto_aead/ascon128av12/bi8/ascon.c @@ -1,15 +1,11 @@ -#include "api.h" +#include -typedef unsigned char u8; -typedef unsigned short u16; -typedef unsigned int u32; -typedef unsigned long long u64; +#include "api.h" -#define RATE (128 / 8) #define PA_ROUNDS 12 #define PB_ROUNDS 8 -#define ROTR8(x, n) (((x) >> (n)) | ((x) << (8 - (n)))) +#define ROR8(x, n) (((x) >> (n)) | ((x) << (8 - (n)))) #define COMPRESS_BYTE_ARRAY_8(a, var_7, var_6, var_5, var_4, var_3, var_2, \ var_1, var_0) \ @@ -269,8 +265,8 @@ typedef unsigned long long u64; a[7] |= t1_7 & 128; \ } while (0) -// This way of implementing Ascon's S-box was inpired by personal communication -// with Joan Daemen about implementing the 3-bit chi layer. +/* This way of implementing Ascon's S-box was inpired by personal communication + with Joan Daemen about implementing the 3-bit chi layer. */ #define ROUND_16(C_7, C_6, C_5, C_4, C_3, C_2, C_1, C_0) \ do { \ /* round constant */ \ @@ -428,22 +424,22 @@ typedef unsigned long long u64; t0_5 = x0_5; \ t0_6 = x0_6; \ t0_7 = x0_7; \ - x0_5 ^= ROTR8(t0_0, 3); \ - x0_6 ^= ROTR8(t0_1, 3); \ - x0_7 ^= ROTR8(t0_2, 3); \ - x0_0 ^= ROTR8(t0_3, 2); \ - x0_1 ^= ROTR8(t0_4, 2); \ - x0_2 ^= ROTR8(t0_5, 2); \ - x0_3 ^= ROTR8(t0_6, 2); \ - x0_4 ^= ROTR8(t0_7, 2); \ - x0_4 ^= ROTR8(t0_0, 4); \ - x0_5 ^= ROTR8(t0_1, 4); \ - x0_6 ^= ROTR8(t0_2, 4); \ - x0_7 ^= ROTR8(t0_3, 4); \ - x0_0 ^= ROTR8(t0_4, 3); \ - x0_1 ^= ROTR8(t0_5, 3); \ - x0_2 ^= ROTR8(t0_6, 3); \ - x0_3 ^= ROTR8(t0_7, 3); \ + x0_5 ^= ROR8(t0_0, 3); \ + x0_6 ^= ROR8(t0_1, 3); \ + x0_7 ^= ROR8(t0_2, 3); \ + x0_0 ^= ROR8(t0_3, 2); \ + x0_1 ^= ROR8(t0_4, 2); \ + x0_2 ^= ROR8(t0_5, 2); \ + x0_3 ^= ROR8(t0_6, 2); \ + x0_4 ^= ROR8(t0_7, 2); \ + x0_4 ^= ROR8(t0_0, 4); \ + x0_5 ^= ROR8(t0_1, 4); \ + x0_6 ^= ROR8(t0_2, 4); \ + x0_7 ^= ROR8(t0_3, 4); \ + x0_0 ^= ROR8(t0_4, 3); \ + x0_1 ^= ROR8(t0_5, 3); \ + x0_2 ^= ROR8(t0_6, 3); \ + x0_3 ^= ROR8(t0_7, 3); \ t0_0 = x1_0; \ t0_1 = x1_1; \ t0_2 = x1_2; \ @@ -457,17 +453,17 @@ typedef unsigned long long u64; x1_5 ^= t0_2; \ x1_6 ^= t0_3; \ x1_7 ^= t0_4; \ - x1_0 ^= ROTR8(t0_5, 7); \ - x1_1 ^= ROTR8(t0_6, 7); \ - x1_2 ^= ROTR8(t0_7, 7); \ - x1_1 ^= ROTR8(t0_0, 5); \ - x1_2 ^= ROTR8(t0_1, 5); \ - x1_3 ^= ROTR8(t0_2, 5); \ - x1_4 ^= ROTR8(t0_3, 5); \ - x1_5 ^= ROTR8(t0_4, 5); \ - x1_6 ^= ROTR8(t0_5, 5); \ - x1_7 ^= ROTR8(t0_6, 5); \ - x1_0 ^= ROTR8(t0_7, 4); \ + x1_0 ^= ROR8(t0_5, 7); \ + x1_1 ^= ROR8(t0_6, 7); \ + x1_2 ^= ROR8(t0_7, 7); \ + x1_1 ^= ROR8(t0_0, 5); \ + x1_2 ^= ROR8(t0_1, 5); \ + x1_3 ^= ROR8(t0_2, 5); \ + x1_4 ^= ROR8(t0_3, 5); \ + x1_5 ^= ROR8(t0_4, 5); \ + x1_6 ^= ROR8(t0_5, 5); \ + x1_7 ^= ROR8(t0_6, 5); \ + x1_0 ^= ROR8(t0_7, 4); \ t0_0 = x2_0; \ t0_1 = x2_1; \ t0_2 = x2_2; \ @@ -476,7 +472,7 @@ typedef unsigned long long u64; t0_5 = x2_5; \ t0_6 = x2_6; \ t0_7 = x2_7; \ - x2_7 ^= ROTR8(t0_0, 1); \ + x2_7 ^= ROR8(t0_0, 1); \ x2_0 ^= t0_1; \ x2_1 ^= t0_2; \ x2_2 ^= t0_3; \ @@ -484,12 +480,12 @@ typedef unsigned long long u64; x2_4 ^= t0_5; \ x2_5 ^= t0_6; \ x2_6 ^= t0_7; \ - x2_2 ^= ROTR8(t0_0, 1); \ - x2_3 ^= ROTR8(t0_1, 1); \ - x2_4 ^= ROTR8(t0_2, 1); \ - x2_5 ^= ROTR8(t0_3, 1); \ - x2_6 ^= ROTR8(t0_4, 1); \ - x2_7 ^= ROTR8(t0_5, 1); \ + x2_2 ^= ROR8(t0_0, 1); \ + x2_3 ^= ROR8(t0_1, 1); \ + x2_4 ^= ROR8(t0_2, 1); \ + x2_5 ^= ROR8(t0_3, 1); \ + x2_6 ^= ROR8(t0_4, 1); \ + x2_7 ^= ROR8(t0_5, 1); \ x2_0 ^= t0_6; \ x2_1 ^= t0_7; \ t0_0 = x3_0; \ @@ -500,22 +496,22 @@ typedef unsigned long long u64; t0_5 = x3_5; \ t0_6 = x3_6; \ t0_7 = x3_7; \ - x3_6 ^= ROTR8(t0_0, 2); \ - x3_7 ^= ROTR8(t0_1, 2); \ - x3_0 ^= ROTR8(t0_2, 1); \ - x3_1 ^= ROTR8(t0_3, 1); \ - x3_2 ^= ROTR8(t0_4, 1); \ - x3_3 ^= ROTR8(t0_5, 1); \ - x3_4 ^= ROTR8(t0_6, 1); \ - x3_5 ^= ROTR8(t0_7, 1); \ - x3_7 ^= ROTR8(t0_0, 3); \ - x3_0 ^= ROTR8(t0_1, 2); \ - x3_1 ^= ROTR8(t0_2, 2); \ - x3_2 ^= ROTR8(t0_3, 2); \ - x3_3 ^= ROTR8(t0_4, 2); \ - x3_4 ^= ROTR8(t0_5, 2); \ - x3_5 ^= ROTR8(t0_6, 2); \ - x3_6 ^= ROTR8(t0_7, 2); \ + x3_6 ^= ROR8(t0_0, 2); \ + x3_7 ^= ROR8(t0_1, 2); \ + x3_0 ^= ROR8(t0_2, 1); \ + x3_1 ^= ROR8(t0_3, 1); \ + x3_2 ^= ROR8(t0_4, 1); \ + x3_3 ^= ROR8(t0_5, 1); \ + x3_4 ^= ROR8(t0_6, 1); \ + x3_5 ^= ROR8(t0_7, 1); \ + x3_7 ^= ROR8(t0_0, 3); \ + x3_0 ^= ROR8(t0_1, 2); \ + x3_1 ^= ROR8(t0_2, 2); \ + x3_2 ^= ROR8(t0_3, 2); \ + x3_3 ^= ROR8(t0_4, 2); \ + x3_4 ^= ROR8(t0_5, 2); \ + x3_5 ^= ROR8(t0_6, 2); \ + x3_6 ^= ROR8(t0_7, 2); \ t0_0 = x4_0; \ t0_1 = x4_1; \ t0_2 = x4_2; \ @@ -524,22 +520,22 @@ typedef unsigned long long u64; t0_5 = x4_5; \ t0_6 = x4_6; \ t0_7 = x4_7; \ - x4_1 ^= ROTR8(t0_0, 1); \ - x4_2 ^= ROTR8(t0_1, 1); \ - x4_3 ^= ROTR8(t0_2, 1); \ - x4_4 ^= ROTR8(t0_3, 1); \ - x4_5 ^= ROTR8(t0_4, 1); \ - x4_6 ^= ROTR8(t0_5, 1); \ - x4_7 ^= ROTR8(t0_6, 1); \ + x4_1 ^= ROR8(t0_0, 1); \ + x4_2 ^= ROR8(t0_1, 1); \ + x4_3 ^= ROR8(t0_2, 1); \ + x4_4 ^= ROR8(t0_3, 1); \ + x4_5 ^= ROR8(t0_4, 1); \ + x4_6 ^= ROR8(t0_5, 1); \ + x4_7 ^= ROR8(t0_6, 1); \ x4_0 ^= t0_7; \ - x4_7 ^= ROTR8(t0_0, 6); \ - x4_0 ^= ROTR8(t0_1, 5); \ - x4_1 ^= ROTR8(t0_2, 5); \ - x4_2 ^= ROTR8(t0_3, 5); \ - x4_3 ^= ROTR8(t0_4, 5); \ - x4_4 ^= ROTR8(t0_5, 5); \ - x4_5 ^= ROTR8(t0_6, 5); \ - x4_6 ^= ROTR8(t0_7, 5); \ + x4_7 ^= ROR8(t0_0, 6); \ + x4_0 ^= ROR8(t0_1, 5); \ + x4_1 ^= ROR8(t0_2, 5); \ + x4_2 ^= ROR8(t0_3, 5); \ + x4_3 ^= ROR8(t0_4, 5); \ + x4_4 ^= ROR8(t0_5, 5); \ + x4_5 ^= ROR8(t0_6, 5); \ + x4_6 ^= ROR8(t0_7, 5); \ } while (0) #define P12_8 \ @@ -570,73 +566,72 @@ typedef unsigned long long u64; ROUND_16(0, 1, 0, 0, 1, 0, 1, 1); \ } while (0) -int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, - const unsigned char *m, unsigned long long mlen, - const unsigned char *ad, unsigned long long adlen, - const unsigned char *nsec, const unsigned char *npub, - const unsigned char *k) { - u64 rlen; - u64 i; +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + uint64_t rlen; + uint64_t i; - u8 buffer[RATE]; + uint8_t buffer[ASCON_RATE]; - u8 K0_0; - u8 K1_0; - u8 N0_0; - u8 N1_0; - u8 x0_0, x1_0, x2_0, x3_0, x4_0; - u8 t0_0, t1_0; + uint8_t K0_0; + uint8_t K1_0; + uint8_t N0_0; + uint8_t N1_0; + uint8_t x0_0, x1_0, x2_0, x3_0, x4_0; + uint8_t t0_0, t1_0; - u8 K0_1; - u8 K1_1; - u8 N0_1; - u8 N1_1; - u8 x0_1, x1_1, x2_1, x3_1, x4_1; - u8 t0_1, t1_1; + uint8_t K0_1; + uint8_t K1_1; + uint8_t N0_1; + uint8_t N1_1; + uint8_t x0_1, x1_1, x2_1, x3_1, x4_1; + uint8_t t0_1, t1_1; - u8 K0_2; - u8 K1_2; - u8 N0_2; - u8 N1_2; - u8 x0_2, x1_2, x2_2, x3_2, x4_2; - u8 t0_2, t1_2; + uint8_t K0_2; + uint8_t K1_2; + uint8_t N0_2; + uint8_t N1_2; + uint8_t x0_2, x1_2, x2_2, x3_2, x4_2; + uint8_t t0_2, t1_2; - u8 K0_3; - u8 K1_3; - u8 N0_3; - u8 N1_3; - u8 x0_3, x1_3, x2_3, x3_3, x4_3; - u8 t0_3, t1_3; + uint8_t K0_3; + uint8_t K1_3; + uint8_t N0_3; + uint8_t N1_3; + uint8_t x0_3, x1_3, x2_3, x3_3, x4_3; + uint8_t t0_3, t1_3; - u8 K0_4; - u8 K1_4; - u8 N0_4; - u8 N1_4; - u8 x0_4, x1_4, x2_4, x3_4, x4_4; - u8 t0_4, t1_4; + uint8_t K0_4; + uint8_t K1_4; + uint8_t N0_4; + uint8_t N1_4; + uint8_t x0_4, x1_4, x2_4, x3_4, x4_4; + uint8_t t0_4, t1_4; - u8 K0_5; - u8 K1_5; - u8 N0_5; - u8 N1_5; - u8 x0_5, x1_5, x2_5, x3_5, x4_5; - u8 t0_5, t1_5; + uint8_t K0_5; + uint8_t K1_5; + uint8_t N0_5; + uint8_t N1_5; + uint8_t x0_5, x1_5, x2_5, x3_5, x4_5; + uint8_t t0_5, t1_5; - u8 K0_6; - u8 K1_6; - u8 N0_6; - u8 N1_6; - u8 x0_6, x1_6, x2_6, x3_6, x4_6; - u8 t0_6, t1_6; + uint8_t K0_6; + uint8_t K1_6; + uint8_t N0_6; + uint8_t N1_6; + uint8_t x0_6, x1_6, x2_6, x3_6, x4_6; + uint8_t t0_6, t1_6; - u8 K0_7; - u8 K1_7; - u8 N0_7; - u8 N1_7; - u8 x0_7, x1_7, x2_7, x3_7, x4_7; - u8 t0_7, t1_7; + uint8_t K0_7; + uint8_t K1_7; + uint8_t N0_7; + uint8_t N1_7; + uint8_t x0_7, x1_7, x2_7, x3_7, x4_7; + uint8_t t0_7, t1_7; - u8 in_0, in_1, in_2, in_3, in_4, in_5, in_6, in_7; + uint8_t in_0, in_1, in_2, in_3, in_4, in_5, in_6, in_7; (void)nsec; @@ -647,11 +642,11 @@ int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, COMPRESS_BYTE_ARRAY_8((npub + 8), N1_7, N1_6, N1_5, N1_4, N1_3, N1_2, N1_1, N1_0); - // initialization - buffer[0] = (u8)(CRYPTO_KEYBYTES * 8); - buffer[1] = (u8)(RATE * 8); - buffer[2] = (u8)PA_ROUNDS; - buffer[3] = (u8)PB_ROUNDS; + /* initialization */ + buffer[0] = (uint8_t)(CRYPTO_KEYBYTES * 8); + buffer[1] = (uint8_t)(ASCON_RATE * 8); + buffer[2] = (uint8_t)PA_ROUNDS; + buffer[3] = (uint8_t)PB_ROUNDS; buffer[4] = 0; buffer[5] = 0; buffer[6] = 0; @@ -707,10 +702,10 @@ int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, x4_6 ^= K1_6; x4_7 ^= K1_7; - // process associated data + /* process associated data */ if (adlen) { rlen = adlen; - while (rlen >= RATE) { + while (rlen >= ASCON_RATE) { COMPRESS_BYTE_ARRAY_8(ad, in_7, in_6, in_5, in_4, in_3, in_2, in_1, in_0); x0_0 ^= in_0; x0_1 ^= in_1; @@ -731,12 +726,12 @@ int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, x1_6 ^= in_6; x1_7 ^= in_7; P8_8; - rlen -= RATE; - ad += RATE; + rlen -= ASCON_RATE; + ad += ASCON_RATE; } for (i = 0; i < rlen; ++i, ++ad) buffer[i] = *ad; buffer[rlen] = 0x80; - for (i = rlen + 1; i < RATE; ++i) buffer[i] = 0; + for (i = rlen + 1; i < ASCON_RATE; ++i) buffer[i] = 0; COMPRESS_BYTE_ARRAY_8(buffer, in_7, in_6, in_5, in_4, in_3, in_2, in_1, in_0); x0_0 ^= in_0; @@ -761,9 +756,9 @@ int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, } x4_0 ^= 1; - // process plaintext + /* process plaintext */ rlen = mlen; - while (rlen >= RATE) { + while (rlen >= ASCON_RATE) { COMPRESS_BYTE_ARRAY_8(m, in_7, in_6, in_5, in_4, in_3, in_2, in_1, in_0); x0_0 ^= in_0; x0_1 ^= in_1; @@ -787,13 +782,13 @@ int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, EXPAND_BYTE_ARRAY_8((c + 8), x1_7, x1_6, x1_5, x1_4, x1_3, x1_2, x1_1, x1_0); P8_8; - rlen -= RATE; - m += RATE; - c += RATE; + rlen -= ASCON_RATE; + m += ASCON_RATE; + c += ASCON_RATE; } for (i = 0; i < rlen; ++i, ++m) buffer[i] = *m; buffer[rlen] = 0x80; - for (i = rlen + 1; i < RATE; ++i) buffer[i] = 0; + for (i = rlen + 1; i < ASCON_RATE; ++i) buffer[i] = 0; COMPRESS_BYTE_ARRAY_8(buffer, in_7, in_6, in_5, in_4, in_3, in_2, in_1, in_0); x0_0 ^= in_0; x0_1 ^= in_1; @@ -818,7 +813,7 @@ int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, x1_0); for (i = 0; i < rlen; ++i, ++c) *c = buffer[i]; - // finalization + /* finalization */ x2_0 ^= K0_0; x2_1 ^= K0_1; x2_2 ^= K0_2; @@ -853,7 +848,7 @@ int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, x4_6 ^= K1_6; x4_7 ^= K1_7; - // return tag + /* return tag */ EXPAND_BYTE_ARRAY_8(c, x3_7, x3_6, x3_5, x3_4, x3_3, x3_2, x3_1, x3_0); c += 8; EXPAND_BYTE_ARRAY_8(c, x4_7, x4_6, x4_5, x4_4, x4_3, x4_2, x4_1, x4_0); @@ -862,77 +857,75 @@ int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, return 0; } -int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, - unsigned char *nsec, const unsigned char *c, - unsigned long long clen, const unsigned char *ad, - unsigned long long adlen, const unsigned char *npub, - const unsigned char *k) { +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { *mlen = 0; if (clen < CRYPTO_ABYTES) return -1; - u64 rlen; - u64 i; + uint64_t rlen; + uint64_t i; - u16 ret_val; - u8 buffer[RATE]; + uint16_t ret_val; + uint8_t buffer[ASCON_RATE]; - u8 K0_0; - u8 K1_0; - u8 N0_0; - u8 N1_0; - u8 x0_0, x1_0, x2_0, x3_0, x4_0; - u8 t0_0, t1_0; + uint8_t K0_0; + uint8_t K1_0; + uint8_t N0_0; + uint8_t N1_0; + uint8_t x0_0, x1_0, x2_0, x3_0, x4_0; + uint8_t t0_0, t1_0; - u8 K0_1; - u8 K1_1; - u8 N0_1; - u8 N1_1; - u8 x0_1, x1_1, x2_1, x3_1, x4_1; - u8 t0_1, t1_1; + uint8_t K0_1; + uint8_t K1_1; + uint8_t N0_1; + uint8_t N1_1; + uint8_t x0_1, x1_1, x2_1, x3_1, x4_1; + uint8_t t0_1, t1_1; - u8 K0_2; - u8 K1_2; - u8 N0_2; - u8 N1_2; - u8 x0_2, x1_2, x2_2, x3_2, x4_2; - u8 t0_2, t1_2; + uint8_t K0_2; + uint8_t K1_2; + uint8_t N0_2; + uint8_t N1_2; + uint8_t x0_2, x1_2, x2_2, x3_2, x4_2; + uint8_t t0_2, t1_2; - u8 K0_3; - u8 K1_3; - u8 N0_3; - u8 N1_3; - u8 x0_3, x1_3, x2_3, x3_3, x4_3; - u8 t0_3, t1_3; + uint8_t K0_3; + uint8_t K1_3; + uint8_t N0_3; + uint8_t N1_3; + uint8_t x0_3, x1_3, x2_3, x3_3, x4_3; + uint8_t t0_3, t1_3; - u8 K0_4; - u8 K1_4; - u8 N0_4; - u8 N1_4; - u8 x0_4, x1_4, x2_4, x3_4, x4_4; - u8 t0_4, t1_4; + uint8_t K0_4; + uint8_t K1_4; + uint8_t N0_4; + uint8_t N1_4; + uint8_t x0_4, x1_4, x2_4, x3_4, x4_4; + uint8_t t0_4, t1_4; - u8 K0_5; - u8 K1_5; - u8 N0_5; - u8 N1_5; - u8 x0_5, x1_5, x2_5, x3_5, x4_5; - u8 t0_5, t1_5; + uint8_t K0_5; + uint8_t K1_5; + uint8_t N0_5; + uint8_t N1_5; + uint8_t x0_5, x1_5, x2_5, x3_5, x4_5; + uint8_t t0_5, t1_5; - u8 K0_6; - u8 K1_6; - u8 N0_6; - u8 N1_6; - u8 x0_6, x1_6, x2_6, x3_6, x4_6; - u8 t0_6, t1_6; + uint8_t K0_6; + uint8_t K1_6; + uint8_t N0_6; + uint8_t N1_6; + uint8_t x0_6, x1_6, x2_6, x3_6, x4_6; + uint8_t t0_6, t1_6; - u8 K0_7; - u8 K1_7; - u8 N0_7; - u8 N1_7; - u8 x0_7, x1_7, x2_7, x3_7, x4_7; - u8 t0_7, t1_7; + uint8_t K0_7; + uint8_t K1_7; + uint8_t N0_7; + uint8_t N1_7; + uint8_t x0_7, x1_7, x2_7, x3_7, x4_7; + uint8_t t0_7, t1_7; - u8 in_0, in_1, in_2, in_3, in_4, in_5, in_6, in_7; + uint8_t in_0, in_1, in_2, in_3, in_4, in_5, in_6, in_7; (void)nsec; @@ -943,11 +936,11 @@ int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, COMPRESS_BYTE_ARRAY_8((npub + 8), N1_7, N1_6, N1_5, N1_4, N1_3, N1_2, N1_1, N1_0); - // initialization - buffer[0] = (u8)(CRYPTO_KEYBYTES * 8); - buffer[1] = (u8)(RATE * 8); - buffer[2] = (u8)PA_ROUNDS; - buffer[3] = (u8)PB_ROUNDS; + /* initialization */ + buffer[0] = (uint8_t)(CRYPTO_KEYBYTES * 8); + buffer[1] = (uint8_t)(ASCON_RATE * 8); + buffer[2] = (uint8_t)PA_ROUNDS; + buffer[3] = (uint8_t)PB_ROUNDS; buffer[4] = 0; buffer[5] = 0; buffer[6] = 0; @@ -1003,10 +996,10 @@ int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, x4_6 ^= K1_6; x4_7 ^= K1_7; - // process associated data + /* process associated data */ if (adlen) { rlen = adlen; - while (rlen >= RATE) { + while (rlen >= ASCON_RATE) { COMPRESS_BYTE_ARRAY_8(ad, in_7, in_6, in_5, in_4, in_3, in_2, in_1, in_0); x0_0 ^= in_0; x0_1 ^= in_1; @@ -1027,12 +1020,12 @@ int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, x1_6 ^= in_6; x1_7 ^= in_7; P8_8; - rlen -= RATE; - ad += RATE; + rlen -= ASCON_RATE; + ad += ASCON_RATE; } for (i = 0; i < rlen; ++i, ++ad) buffer[i] = *ad; buffer[rlen] = 0x80; - for (i = rlen + 1; i < RATE; ++i) buffer[i] = 0; + for (i = rlen + 1; i < ASCON_RATE; ++i) buffer[i] = 0; COMPRESS_BYTE_ARRAY_8(buffer, in_7, in_6, in_5, in_4, in_3, in_2, in_1, in_0); x0_0 ^= in_0; @@ -1057,20 +1050,20 @@ int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, } x4_0 ^= 1; - // process ciphertext + /* process ciphertext */ rlen = clen - CRYPTO_KEYBYTES; - while (rlen >= RATE) { + while (rlen >= ASCON_RATE) { EXPAND_BYTE_ARRAY_8(m, x0_7, x0_6, x0_5, x0_4, x0_3, x0_2, x0_1, x0_0); EXPAND_BYTE_ARRAY_8((m + 8), x1_7, x1_6, x1_5, x1_4, x1_3, x1_2, x1_1, x1_0); - for (i = 0; i < RATE; ++i) m[i] ^= c[i]; + for (i = 0; i < ASCON_RATE; ++i) m[i] ^= c[i]; COMPRESS_BYTE_ARRAY_8(c, x0_7, x0_6, x0_5, x0_4, x0_3, x0_2, x0_1, x0_0); COMPRESS_BYTE_ARRAY_8((c + 8), x1_7, x1_6, x1_5, x1_4, x1_3, x1_2, x1_1, x1_0); P8_8; - rlen -= RATE; - m += RATE; - c += RATE; + rlen -= ASCON_RATE; + m += ASCON_RATE; + c += ASCON_RATE; } EXPAND_BYTE_ARRAY_8(buffer, x0_7, x0_6, x0_5, x0_4, x0_3, x0_2, x0_1, x0_0); EXPAND_BYTE_ARRAY_8((buffer + 8), x1_7, x1_6, x1_5, x1_4, x1_3, x1_2, x1_1, @@ -1085,7 +1078,7 @@ int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, COMPRESS_BYTE_ARRAY_8((buffer + 8), x1_7, x1_6, x1_5, x1_4, x1_3, x1_2, x1_1, x1_0); - // finalization + /* finalization */ x2_0 ^= K0_0; x2_1 ^= K0_1; x2_2 ^= K0_2; @@ -1120,7 +1113,7 @@ int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, x4_6 ^= K1_6; x4_7 ^= K1_7; - // return -1 if verification fails + /* return -1 if verification fails */ ret_val = 0; EXPAND_BYTE_ARRAY_8(buffer, x3_7, x3_6, x3_5, x3_4, x3_3, x3_2, x3_1, x3_0); @@ -1132,8 +1125,7 @@ int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, if (ret_val != 0) return -1; - // return plaintext + /* return plaintext */ *mlen = clen - CRYPTO_ABYTES; return 0; } - diff --git a/ascon/Implementations/crypto_aead/ascon128av12/neon/api.h b/ascon/Implementations/crypto_aead/ascon128av12/neon/api.h new file mode 100644 index 0000000..951ee9c --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/neon/api.h @@ -0,0 +1,6 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 +#define ASCON_RATE 16 diff --git a/ascon/Implementations/crypto_aead/ascon128av12/neon/decrypt.c b/ascon/Implementations/crypto_aead/ascon128av12/neon/decrypt.c new file mode 100644 index 0000000..29f1245 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/neon/decrypt.c @@ -0,0 +1,91 @@ +#include "api.h" +#include "endian.h" +#include "permutations.h" + +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define IV \ + ((uint64_t)(8 * (CRYPTO_KEYBYTES)) << 56 | \ + (uint64_t)(8 * (ASCON_RATE)) << 48 | (uint64_t)(PA_ROUNDS) << 40 | \ + (uint64_t)(PB_ROUNDS) << 32) + +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + + const uint64_t K0 = U64BIG(*(uint64_t*)k); + const uint64_t K1 = U64BIG(*(uint64_t*)(k + 8)); + const uint64_t N0 = U64BIG(*(uint64_t*)npub); + const uint64_t N1 = U64BIG(*(uint64_t*)(npub + 8)); + state_t s; + uint32_t i; + (void)nsec; + + /* set plaintext size */ + *mlen = clen - CRYPTO_ABYTES; + + /* initialization */ + s.x0 = IV; + s.x1 = K0; + s.x2 = K1; + s.x3 = N0; + s.x4 = N1; + P12(); + s.x3 ^= K0; + s.x4 ^= K1; + + /* process associated data */ + if (adlen) { + AD(); + for (i = 0; i < adlen; ++i, ++ad) + if (i < 8) + s.x0 ^= SETBYTE(*ad, i); + else + s.x1 ^= SETBYTE(*ad, i % 8); + if (adlen < 8) + s.x0 ^= SETBYTE(0x80, adlen); + else + s.x1 ^= SETBYTE(0x80, adlen % 8); + P8(); + } + s.x4 ^= 1; + + /* process plaintext */ + clen -= CRYPTO_ABYTES; + CT(); + for (i = 0; i < clen; ++i, ++m, ++c) { + if (i < 8) { + *m = GETBYTE(s.x0, i) ^ *c; + s.x0 &= ~SETBYTE(0xff, i); + s.x0 |= SETBYTE(*c, i); + } else { + *m = GETBYTE(s.x1, i % 8) ^ *c; + s.x1 &= ~SETBYTE(0xff, i % 8); + s.x1 |= SETBYTE(*c, i % 8); + } + } + if (clen < 8) + s.x0 ^= SETBYTE(0x80, clen); + else + s.x1 ^= SETBYTE(0x80, clen % 8); + + /* finalization */ + s.x2 ^= K0; + s.x3 ^= K1; + P12(); + s.x3 ^= K0; + s.x4 ^= K1; + + /* verify tag (should be constant time, check compiler output) */ + if (((s.x3 ^ U64BIG(*(uint64_t*)c)) | (s.x4 ^ U64BIG(*(uint64_t*)(c + 8)))) != + 0) { + *mlen = 0; + return -1; + } + + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon128av12/neon/encrypt.c b/ascon/Implementations/crypto_aead/ascon128av12/neon/encrypt.c new file mode 100644 index 0000000..308eae1 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/neon/encrypt.c @@ -0,0 +1,81 @@ +#include "api.h" +#include "endian.h" +#include "permutations.h" + +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define IV \ + ((uint64_t)(8 * (CRYPTO_KEYBYTES)) << 56 | \ + (uint64_t)(8 * (ASCON_RATE)) << 48 | (uint64_t)(PA_ROUNDS) << 40 | \ + (uint64_t)(PB_ROUNDS) << 32) + +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + const uint64_t K0 = U64BIG(*(uint64_t*)k); + const uint64_t K1 = U64BIG(*(uint64_t*)(k + 8)); + const uint64_t N0 = U64BIG(*(uint64_t*)npub); + const uint64_t N1 = U64BIG(*(uint64_t*)(npub + 8)); + state_t s; + uint32_t i; + (void)nsec; + + /* set ciphertext size */ + *clen = mlen + CRYPTO_ABYTES; + + /* initialization */ + s.x0 = IV; + s.x1 = K0; + s.x2 = K1; + s.x3 = N0; + s.x4 = N1; + P12(); + s.x3 ^= K0; + s.x4 ^= K1; + + /* process associated data */ + if (adlen) { + AD(); + for (i = 0; i < adlen; ++i, ++ad) + if (i < 8) + s.x0 ^= SETBYTE(*ad, i); + else + s.x1 ^= SETBYTE(*ad, i % 8); + if (adlen < 8) + s.x0 ^= SETBYTE(0x80, adlen); + else + s.x1 ^= SETBYTE(0x80, adlen % 8); + P8(); + } + s.x4 ^= 1; + + /* process plaintext */ + PT(); + for (i = 0; i < mlen; ++i, ++m, ++c) { + if (i < 8) { + s.x0 ^= SETBYTE(*m, i); + *c = GETBYTE(s.x0, i); + } else { + s.x1 ^= SETBYTE(*m, i % 8); + *c = GETBYTE(s.x1, i % 8); + } + } + if (mlen < 8) + s.x0 ^= SETBYTE(0x80, mlen); + else + s.x1 ^= SETBYTE(0x80, mlen % 8); + + /* finalization */ + s.x2 ^= K0; + s.x3 ^= K1; + P12(); + s.x3 ^= K0; + s.x4 ^= K1; + + /* set tag */ + *(uint64_t*)c = U64BIG(s.x3); + *(uint64_t*)(c + 8) = U64BIG(s.x4); + + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon128av12/neon/endian.h b/ascon/Implementations/crypto_aead/ascon128av12/neon/endian.h new file mode 100644 index 0000000..3944360 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/neon/endian.h @@ -0,0 +1,39 @@ +#ifndef ENDIAN_H_ +#define ENDIAN_H_ + +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif +#define U64BIG(x) (x) +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +#elif defined(_MSC_VER) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) +#define U32BIG(x) \ + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) + +#else +#error "Ascon byte order macros not defined in endian.h" +#endif + +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/neon/implementors b/ascon/Implementations/crypto_aead/ascon128av12/neon/implementors new file mode 100644 index 0000000..b110c1a --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/neon/implementors @@ -0,0 +1,2 @@ +Christoph Dobraunig +Martin Schläffer diff --git a/ascon/Implementations/crypto_aead/ascon128av12/neon/loadstore.h b/ascon/Implementations/crypto_aead/ascon128av12/neon/loadstore.h new file mode 100644 index 0000000..6cb0cca --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/neon/loadstore.h @@ -0,0 +1,118 @@ +#ifndef LOADSTORE_H_ +#define LOADSTORE_H_ + +#include + +#include "config.h" +#include "endian.h" +#include "word.h" + +/* 64-bit LSB mask (undefined for n == 0) */ +#define MASK(n) (~0ull >> (64 - (n))) + +/* get byte from Ascon 64-bit word */ +#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i)))) + +/* set byte in Ascon 64-bit word */ +#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i))) + +#if ASCON_DATA_ACCESS == 'W' + +#ifndef NDEBUG +#pragma message("Using wordwise data access") +#endif + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = U64BIG(*(uint64_t*)bytes); + return U64TOWORD(x); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = U64BIG(*(uint64_t*)bytes & MASK(8 * n)); + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(8 * n); + *(uint64_t*)bytes |= U64BIG(x); +} + +#elif ASCON_DATA_ACCESS == 'M' + +#ifndef NDEBUG +#pragma message("Using memcpy to access data") +#endif + +#include + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + memcpy((uint8_t*)&x, bytes, n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = U64BIG(WORDTOU64(w)); + memcpy(bytes, (uint8_t*)&x, n); +} + +#elif ASCON_DATA_ACCESS == 'B' + +#ifndef NDEBUG +#pragma message("Using bytewise data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#elif ASCON_DATA_ACCESS == 'H' + +#ifndef NDEBUG +#pragma message("Using hybrid data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + if (n == 8) + x = U64BIG(*(uint64_t*)bytes); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + if (n == 8) + *(uint64_t*)bytes = U64BIG(x); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#else +#error "Ascon data access macro not defined correctly" +#endif + +#endif /* LOADSTORE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/neon/permutations.h b/ascon/Implementations/crypto_aead/ascon128av12/neon/permutations.h new file mode 100644 index 0000000..91def93 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/neon/permutations.h @@ -0,0 +1,239 @@ +#ifndef PERMUTATIONS_H_ +#define PERMUTATIONS_H_ + +typedef struct { + uint64_t x0, x1, x2, x3, x4; +} state_t; + +static const uint64_t C[12] = { + 0xffffffffffffff0full, 0xffffffffffffff1eull, 0xffffffffffffff2dull, + 0xffffffffffffff3cull, 0xffffffffffffff4bull, 0xffffffffffffff5aull, + 0xffffffffffffff69ull, 0xffffffffffffff78ull, 0xffffffffffffff87ull, + 0xffffffffffffff96ull, 0xffffffffffffffa5ull, 0xffffffffffffffb4ull, +}; + +/* clang-format off */ +#define ROUND(OFFSET) \ + "vldr d31, [%[C], #" #OFFSET "] \n\t" \ + "veor d0, d0, d4 \n\t" \ + "veor d4, d4, d3 \n\t" \ + "veor d2, d2, d31 \n\t" \ + "vbic d13, d0, d4 \n\t" \ + "vbic d12, d4, d3 \n\t" \ + "veor d2, d2, d1 \n\t" \ + "vbic d14, d1, d0 \n\t" \ + "vbic d11, d3, d2 \n\t" \ + "vbic d10, d2, d1 \n\t" \ + "veor q0, q0, q5 \n\t" \ + "veor q1, q1, q6 \n\t" \ + "veor d4, d4, d14 \n\t" \ + "veor d1, d1, d0 \n\t" \ + "veor d3, d3, d2 \n\t" \ + "veor d0, d0, d4 \n\t" \ + "vsri.64 d14, d4, #7 \n\t" \ + "vsri.64 d24, d4, #41 \n\t" \ + "vsri.64 d11, d1, #39 \n\t" \ + "vsri.64 d21, d1, #61 \n\t" \ + "vsri.64 d10, d0, #19 \n\t" \ + "vsri.64 d20, d0, #28 \n\t" \ + "vsri.64 d12, d2, #1 \n\t" \ + "vsri.64 d22, d2, #6 \n\t" \ + "vsri.64 d13, d3, #10 \n\t" \ + "vsri.64 d23, d3, #17 \n\t" \ + "vsli.64 d10, d0, #45 \n\t" \ + "vsli.64 d20, d0, #36 \n\t" \ + "vsli.64 d11, d1, #25 \n\t" \ + "vsli.64 d21, d1, #3 \n\t" \ + "vsli.64 d12, d2, #63 \n\t" \ + "vsli.64 d22, d2, #58 \n\t" \ + "vsli.64 d13, d3, #54 \n\t" \ + "vsli.64 d23, d3, #47 \n\t" \ + "vsli.64 d14, d4, #57 \n\t" \ + "vsli.64 d24, d4, #23 \n\t" \ + "veor q5, q5, q0 \n\t" \ + "veor q6, q6, q1 \n\t" \ + "veor d14, d14, d4 \n\t" \ + "veor q0, q5, q10 \n\t" \ + "veor d4, d14, d24 \n\t" \ + "veor q1, q6, q11 \n\t" +/* clang-format on */ + +#define P12() \ + __asm__ __volatile__ ( \ + ".arm \n\t" \ + ".fpu neon \n\t" \ + "vldm %[s], {d0-d4} \n\t" \ + "vmvn d2, d2 \n\t" \ + ROUND(0) \ + ROUND(8) \ + ROUND(16) \ + ROUND(24) \ + ROUND(32) \ + ROUND(40) \ + ROUND(48) \ + ROUND(56) \ + ROUND(64) \ + ROUND(72) \ + ROUND(80) \ + ROUND(88) \ + "vmvn d2, d2 \n\t" \ + "vstm %[s], {d0-d4} \n\t" \ + :: [s] "r" (&s), [C] "r" (C) \ + : "d0", "d1", "d2", "d3", "d4", \ + "d10", "d11", "d12", "d13", "d14", \ + "d20", "d21", "d22", "d23", "d24", \ + "d31", "memory") + +#define P8() \ + __asm__ __volatile__ ( \ + ".arm \n\t" \ + ".fpu neon \n\t" \ + "vldm %[s], {d0-d4} \n\t" \ + "vmvn d2, d2 \n\t" \ + ROUND(32) \ + ROUND(40) \ + ROUND(48) \ + ROUND(56) \ + ROUND(64) \ + ROUND(72) \ + ROUND(80) \ + ROUND(88) \ + "vmvn d2, d2 \n\t" \ + "vstm %[s], {d0-d4} \n\t" \ + :: [s] "r" (&s), [C] "r" (C) \ + : "d0", "d1", "d2", "d3", "d4", \ + "d10", "d11", "d12", "d13", "d14", \ + "d20", "d21", "d22", "d23", "d24", \ + "d31", "memory") + +#define AD() \ + do { \ + uint32_t adlen_hi = (uint32_t)(adlen >> 32); \ + uint32_t adlen_lo = (uint32_t)adlen; \ + __asm__ __volatile__ ( \ + ".arm \n\t" \ + ".fpu neon \n\t" \ + "cmp %[adlen_hi], #0 \n\t" \ + "cmpeq %[adlen_lo], #15 \n\t" \ + "bls .LAD1 \n\t" \ + "vldm %[s], {d0-d4} \n\t" \ + ".LAD0: \n\t" \ + "vldm %[ad]!, {d16,d17} \n\t" \ + "vrev64.8 q8, q8 \n\t" \ + "veor q0, q0, q8 \n\t" \ + "vmvn d2, d2 \n\t" \ + ROUND(32) \ + ROUND(40) \ + ROUND(48) \ + ROUND(56) \ + ROUND(64) \ + ROUND(72) \ + ROUND(80) \ + ROUND(88) \ + "vmvn d2, d2 \n\t" \ + "subs %[adlen_lo], %[adlen_lo], #16 \n\t" \ + "sbc %[adlen_hi], %[adlen_hi], #0 \n\t" \ + "cmp %[adlen_hi], #0 \n\t" \ + "cmpeq %[adlen_lo], #15 \n\t" \ + "bhi .LAD0 \n\t" \ + "vstm %[s], {d0-d4} \n\t" \ + ".LAD1: \n\t" \ + : [adlen_hi] "+r" (adlen_hi), [adlen_lo] "+r" (adlen_lo), \ + [ad] "+r" (ad) \ + : [s] "r" (&s), [C] "r" (C) \ + : "d0", "d1", "d2", "d3", "d4", \ + "d10", "d11", "d12", "d13", "d14", "d16", "d17", \ + "d20", "d21", "d22", "d23", "d24", \ + "d31", "memory"); \ + adlen = (uint64_t)adlen_hi << 32 | adlen_lo; \ + } while (0) + +#define PT() \ + do { \ + uint32_t mlen_hi = (uint32_t)(mlen >> 32); \ + uint32_t mlen_lo = (uint32_t)mlen; \ + __asm__ __volatile__ ( \ + ".arm \n\t" \ + ".fpu neon \n\t" \ + "cmp %[mlen_hi], #0 \n\t" \ + "cmpeq %[mlen_lo], #15 \n\t" \ + "bls .LPT1 \n\t" \ + "vldm %[s], {d0-d4} \n\t" \ + ".LPT0: \n\t" \ + "vldm %[m]!, {d16,d17} \n\t" \ + "vrev64.8 q8, q8 \n\t" \ + "veor q0, q0, q8 \n\t" \ + "vrev64.8 q13, q0 \n\t" \ + "vstm %[c]!, {d26,d27} \n\t" \ + "vmvn d2, d2 \n\t" \ + ROUND(32) \ + ROUND(40) \ + ROUND(48) \ + ROUND(56) \ + ROUND(64) \ + ROUND(72) \ + ROUND(80) \ + ROUND(88) \ + "vmvn d2, d2 \n\t" \ + "subs %[mlen_lo], %[mlen_lo], #16 \n\t" \ + "sbc %[mlen_hi], %[mlen_hi], #0 \n\t" \ + "cmp %[mlen_hi], #0 \n\t" \ + "cmpeq %[mlen_lo], #15 \n\t" \ + "bhi .LPT0 \n\t" \ + "vstm %[s], {d0-d4} \n\t" \ + ".LPT1: \n\t" \ + : [mlen_hi] "+r" (mlen_hi), [mlen_lo] "+r" (mlen_lo), \ + [m] "+r" (m), [c] "+r" (c) \ + : [s] "r" (&s), [C] "r" (C) \ + : "d0", "d1", "d2", "d3", "d4", \ + "d10", "d11", "d12", "d13", "d14", "d16", "d17", \ + "d20", "d21", "d22", "d23", "d24", "d26", "d27", \ + "d31", "memory"); \ + mlen = (uint64_t)mlen_hi << 32 | mlen_lo; \ + } while (0) + +#define CT() \ + do { \ + uint32_t clen_hi = (uint32_t)(clen >> 32); \ + uint32_t clen_lo = (uint32_t)clen; \ + __asm__ __volatile__ ( \ + ".arm \n\t" \ + ".fpu neon \n\t" \ + "cmp %[clen_hi], #0 \n\t" \ + "cmpeq %[clen_lo], #15 \n\t" \ + "bls .LCT1 \n\t" \ + "vldm %[s], {d0-d4} \n\t" \ + ".LCT0: \n\t" \ + "vldm %[c]!, {d26,d27} \n\t" \ + "vrev64.8 q8, q0 \n\t" \ + "veor q8, q8, q13 \n\t" \ + "vrev64.8 q0, q13 \n\t" \ + "vstm %[m]!, {d16,d17} \n\t" \ + "vmvn d2, d2 \n\t" \ + ROUND(32) \ + ROUND(40) \ + ROUND(48) \ + ROUND(56) \ + ROUND(64) \ + ROUND(72) \ + ROUND(80) \ + ROUND(88) \ + "vmvn d2, d2 \n\t" \ + "subs %[clen_lo], %[clen_lo], #16 \n\t" \ + "sbc %[clen_hi], %[clen_hi], #0 \n\t" \ + "cmp %[clen_hi], #0 \n\t" \ + "cmpeq %[clen_lo], #15 \n\t" \ + "bhi .LCT0 \n\t" \ + "vstm %[s], {d0-d4} \n\t" \ + ".LCT1: \n\t" \ + : [clen_hi] "+r" (clen_hi), [clen_lo] "+r" (clen_lo), \ + [m] "+r" (m), [c] "+r" (c) \ + : [s] "r" (&s), [C] "r" (C) \ + : "d0", "d1", "d2", "d3", "d4", \ + "d10", "d11", "d12", "d13", "d14", "d16", "d17", \ + "d20", "d21", "d22", "d23", "d24", "d26", "d27", \ + "d31", "memory"); \ + clen = (uint64_t)clen_hi << 32 | clen_lo; \ + } while (0) + +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64/api.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64/api.h index a4aa567..951ee9c 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/opt64/api.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64/api.h @@ -3,3 +3,4 @@ #define CRYPTO_NPUBBYTES 16 #define CRYPTO_ABYTES 16 #define CRYPTO_NOOVERLAP 1 +#define ASCON_RATE 16 diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64/ascon.c b/ascon/Implementations/crypto_aead/ascon128av12/opt64/ascon.c new file mode 100644 index 0000000..9011a77 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64/ascon.c @@ -0,0 +1,250 @@ +#include "ascon.h" + +#include "api.h" +#include "loadstore.h" +#include "permutations.h" +#include "printstate.h" + +__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2, + const uint8_t* k) { + KINIT(K0, K1, K2); + if (CRYPTO_KEYBYTES == 20) { + XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4))); + k += 4; + } + XOR(*K1, LOAD64(k)); + XOR(*K2, LOAD64(k + 8)); +} + +__forceinline void init(state_t* s, const uint8_t* npub, const uint8_t* k) { + word_t N0, N1; + word_t K0, K1, K2; + /* load nonce */ + N0 = LOAD64(npub); + N1 = LOAD64(npub + 8); + /* load key */ + loadkey(&K0, &K1, &K2, k); + /* initialization */ + PINIT(s); + XOR(s->x0, IV); + if (CRYPTO_KEYBYTES == 20) XOR(s->x0, K0); + XOR(s->x1, K1); + XOR(s->x2, K2); + XOR(s->x3, N0); + XOR(s->x4, N1); + P12(s); + if (CRYPTO_KEYBYTES == 20) XOR(s->x2, K0); + XOR(s->x3, K1); + XOR(s->x4, K2); + printstate("initialization", s); +} + +__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + word_t* restrict px; + /* process associated data */ + if (adlen) { + while (adlen >= ASCON_RATE) { + XOR(s->x0, LOAD64(ad)); + if (ASCON_RATE == 16) XOR(s->x1, LOAD64(ad + 8)); + PB(s); + ad += ASCON_RATE; + adlen -= ASCON_RATE; + } + /* final associated data block */ + px = &s->x0; + if (ASCON_RATE == 16 && adlen >= 8) { + XOR(s->x0, LOAD64(ad)); + px = &s->x1; + ad += 8; + adlen -= 8; + } + if (adlen) XOR(*px, LOAD(ad, adlen)); + XOR(*px, PAD(adlen)); + PB(s); + } + XOR(s->x4, WORD_T(1)); + printstate("process associated data", s); +} + +__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m, + uint64_t mlen) { + word_t* restrict px; + /* process plaintext */ + while (mlen >= ASCON_RATE) { + XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + if (ASCON_RATE == 16) { + XOR(s->x1, LOAD64(m + 8)); + STORE64(c + 8, s->x1); + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + mlen -= ASCON_RATE; + } + /* final plaintext block */ + px = &s->x0; + if (ASCON_RATE == 16 && mlen >= 8) { + XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + px = &s->x1; + m += 8; + c += 8; + mlen -= 8; + } + if (mlen) { + XOR(*px, LOAD(m, mlen)); + STORE(c, *px, mlen); + } + XOR(*px, PAD(mlen)); + printstate("process plaintext", s); +} + +__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c, + uint64_t clen) { + word_t* restrict px; + word_t cx; + /* process ciphertext */ + while (clen >= ASCON_RATE) { + cx = LOAD64(c); + XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + if (ASCON_RATE == 16) { + cx = LOAD64(c + 8); + XOR(s->x1, cx); + STORE64(m + 8, s->x1); + s->x1 = cx; + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + clen -= ASCON_RATE; + } + /* final ciphertext block */ + px = &s->x0; + if (ASCON_RATE == 16 && clen >= 8) { + cx = LOAD64(c); + XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + px = &s->x1; + m += 8; + c += 8; + clen -= 8; + } + if (clen) { + cx = LOAD(c, clen); + XOR(*px, cx); + STORE(m, *px, clen); + AND(*px, XMASK(clen)); + XOR(*px, cx); + } + XOR(*px, PAD(clen)); + printstate("process ciphertext", s); +} + +__forceinline void final(state_t* s, const uint8_t* k) { + word_t K0, K1, K2; + /* load key */ + loadkey(&K0, &K1, &K2, k); + /* finalization */ + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) { + XOR(s->x1, K1); + XOR(s->x2, K2); + } + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { + XOR(s->x2, K1); + XOR(s->x3, K2); + } + if (CRYPTO_KEYBYTES == 20) { + XOR(s->x1, KEYROT(K0, K1)); + XOR(s->x2, KEYROT(K1, K2)); + XOR(s->x3, KEYROT(K2, WORD_T(0))); + } + P12(s); + XOR(s->x3, K1); + XOR(s->x4, K2); + printstate("finalization", s); +} + +#if !ASCON_INLINE_MODE + +#define INIT ascon_init +#define ABSORB ascon_absorb +#define ENCRYPT ascon_encrypt +#define DECRYPT ascon_decrypt +#define FINAL ascon_final + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) { + init(s, npub, k); +} + +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + absorb(s, ad, adlen); +} + +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) { + encrypt(s, c, m, mlen); +} + +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) { + decrypt(s, m, c, clen); +} + +void ascon_final(state_t* s, const uint8_t* k) { final(s, k); } + +#else + +#define INIT init +#define ABSORB absorb +#define ENCRYPT encrypt +#define DECRYPT decrypt +#define FINAL final + +#endif + +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + state_t s; + (void)nsec; + *clen = mlen + CRYPTO_ABYTES; + /* perform ascon computation */ + INIT(&s, npub, k); + ABSORB(&s, ad, adlen); + ENCRYPT(&s, c, m, mlen); + FINAL(&s, k); + /* set tag */ + c += mlen; + STORE64(c, s.x3); + STORE64(c + 8, s.x4); + return 0; +} + +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { + state_t s; + (void)nsec; + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + *mlen = clen = clen - CRYPTO_ABYTES; + /* perform ascon computation */ + INIT(&s, npub, k); + ABSORB(&s, ad, adlen); + DECRYPT(&s, m, c, clen); + FINAL(&s, k); + /* verify tag (should be constant time, check compiler output) */ + c += clen; + XOR(s.x3, LOAD64(c)); + XOR(s.x4, LOAD64(c + 8)); + if (NOTZERO(s.x3, s.x4)) { + *mlen = 0; + return -1; + } + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64/ascon.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64/ascon.h new file mode 100644 index 0000000..10a5b6e --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64/ascon.h @@ -0,0 +1,19 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#include "config.h" +#include "word.h" + +typedef struct { + word_t x0, x1, x2, x3, x4; +} state_t; + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k); +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen); +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); +void ascon_final(state_t* s, const uint8_t* k); + +#endif // ASCON_H_ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64/config.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64/config.h new file mode 100644 index 0000000..8d8a1a0 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64/config.h @@ -0,0 +1,34 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* inline the Ascon mode */ +#ifndef ASCON_INLINE_MODE +#define ASCON_INLINE_MODE 1 +#endif + +/* inline the Ascon permutations */ +#ifndef ASCON_INLINE_PERM +#define ASCON_INLINE_PERM 1 +#endif + +/* single function for all permutations */ +#ifndef ASCON_SINGLE_PERM +#define ASCON_SINGLE_PERM 0 +#endif + +/* unroll the permutation loops */ +#ifndef ASCON_UNROLL_LOOPS +#define ASCON_UNROLL_LOOPS 1 +#endif + +/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ +#ifndef ASCON_DATA_ACCESS +#define ASCON_DATA_ACCESS 'H' +#endif + +/* make sure __forceinline is supported */ +#ifndef __forceinline +#define __forceinline inline __attribute__((always_inline)) +#endif + +#endif /* CONFIG_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64/endian.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64/endian.h index ffbdcd9..3944360 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/opt64/endian.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64/endian.h @@ -3,29 +3,37 @@ #if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -// macros for big endian machines +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif #define U64BIG(x) (x) #define U32BIG(x) (x) #define U16BIG(x) (x) #elif defined(_MSC_VER) || \ - (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -// macros for little endian machines -#define U64BIG(x) \ - ((((x) & 0x00000000000000FFULL) << 56) | (((x) & 0x000000000000FF00ULL) << 40) | \ - (((x) & 0x0000000000FF0000ULL) << 24) | (((x) & 0x00000000FF000000ULL) << 8) | \ - (((x) & 0x000000FF00000000ULL) >> 8) | (((x) & 0x0000FF0000000000ULL) >> 24) | \ - (((x) & 0x00FF000000000000ULL) >> 40) | (((x) & 0xFF00000000000000ULL) >> 56)) +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) #define U32BIG(x) \ - ((((x) & 0x000000FF) << 24) | (((x) & 0x0000FF00) << 8) | \ - (((x) & 0x00FF0000) >> 8) | (((x) & 0xFF000000) >> 24)) -#define U16BIG(x) \ - ((((x) & 0x00FF) << 8) | (((x) & 0xFF00) >> 8)) + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) #else -#error "ascon byte order macros not defined in endian.h" +#error "Ascon byte order macros not defined in endian.h" #endif -#endif // ENDIAN_H_ - +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64/loadstore.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64/loadstore.h new file mode 100644 index 0000000..6cb0cca --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64/loadstore.h @@ -0,0 +1,118 @@ +#ifndef LOADSTORE_H_ +#define LOADSTORE_H_ + +#include + +#include "config.h" +#include "endian.h" +#include "word.h" + +/* 64-bit LSB mask (undefined for n == 0) */ +#define MASK(n) (~0ull >> (64 - (n))) + +/* get byte from Ascon 64-bit word */ +#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i)))) + +/* set byte in Ascon 64-bit word */ +#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i))) + +#if ASCON_DATA_ACCESS == 'W' + +#ifndef NDEBUG +#pragma message("Using wordwise data access") +#endif + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = U64BIG(*(uint64_t*)bytes); + return U64TOWORD(x); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = U64BIG(*(uint64_t*)bytes & MASK(8 * n)); + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(8 * n); + *(uint64_t*)bytes |= U64BIG(x); +} + +#elif ASCON_DATA_ACCESS == 'M' + +#ifndef NDEBUG +#pragma message("Using memcpy to access data") +#endif + +#include + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + memcpy((uint8_t*)&x, bytes, n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = U64BIG(WORDTOU64(w)); + memcpy(bytes, (uint8_t*)&x, n); +} + +#elif ASCON_DATA_ACCESS == 'B' + +#ifndef NDEBUG +#pragma message("Using bytewise data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#elif ASCON_DATA_ACCESS == 'H' + +#ifndef NDEBUG +#pragma message("Using hybrid data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + if (n == 8) + x = U64BIG(*(uint64_t*)bytes); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + if (n == 8) + *(uint64_t*)bytes = U64BIG(x); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#else +#error "Ascon data access macro not defined correctly" +#endif + +#endif /* LOADSTORE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64/permutations.c b/ascon/Implementations/crypto_aead/ascon128av12/opt64/permutations.c new file mode 100644 index 0000000..3aecaa6 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64/permutations.c @@ -0,0 +1,26 @@ +#include "permutations.h" + +#include "round.h" + +#if ASCON_INLINE_PERM + +#elif ASCON_SINGLE_PERM + +void P(state_t* s, uint8_t rounds) { + printstate(" permutation input", s); + for (int i = START(rounds); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s) { P12ROUNDS(s); } + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16 +void P8(state_t* s) { P8ROUNDS(s); } +#endif + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 +void P6(state_t* s) { P6ROUNDS(s); } +#endif + +#endif diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64/permutations.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64/permutations.h index 4af1e2c..6172dd5 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/opt64/permutations.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64/permutations.h @@ -1,102 +1,184 @@ #ifndef PERMUTATIONS_H_ #define PERMUTATIONS_H_ -typedef unsigned char u8; -typedef unsigned long long u64; - -typedef struct { - u64 x0, x1, x2, x3, x4; -} state; - -#define EXT_BYTE64(x, n) ((u8)((u64)(x) >> (8 * (7 - (n))))) -#define INS_BYTE64(x, n) ((u64)(x) << (8 * (7 - (n)))) -#define ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) - -#define ROUND(C) \ - do { \ - state t; \ - s.x2 ^= C; \ - s.x0 ^= s.x4; \ - s.x4 ^= s.x3; \ - s.x2 ^= s.x1; \ - t.x0 = s.x0; \ - t.x4 = s.x4; \ - t.x3 = s.x3; \ - t.x1 = s.x1; \ - t.x2 = s.x2; \ - s.x0 = t.x0 ^ ((~t.x1) & t.x2); \ - s.x2 = t.x2 ^ ((~t.x3) & t.x4); \ - s.x4 = t.x4 ^ ((~t.x0) & t.x1); \ - s.x1 = t.x1 ^ ((~t.x2) & t.x3); \ - s.x3 = t.x3 ^ ((~t.x4) & t.x0); \ - s.x1 ^= s.x0; \ - t.x1 = s.x1; \ - s.x1 = ROTR64(s.x1, 39); \ - s.x3 ^= s.x2; \ - t.x2 = s.x2; \ - s.x2 = ROTR64(s.x2, 1); \ - t.x4 = s.x4; \ - t.x2 ^= s.x2; \ - s.x2 = ROTR64(s.x2, 6 - 1); \ - t.x3 = s.x3; \ - t.x1 ^= s.x1; \ - s.x3 = ROTR64(s.x3, 10); \ - s.x0 ^= s.x4; \ - s.x4 = ROTR64(s.x4, 7); \ - t.x3 ^= s.x3; \ - s.x2 ^= t.x2; \ - s.x1 = ROTR64(s.x1, 61 - 39); \ - t.x0 = s.x0; \ - s.x2 = ~s.x2; \ - s.x3 = ROTR64(s.x3, 17 - 10); \ - t.x4 ^= s.x4; \ - s.x4 = ROTR64(s.x4, 41 - 7); \ - s.x3 ^= t.x3; \ - s.x1 ^= t.x1; \ - s.x0 = ROTR64(s.x0, 19); \ - s.x4 ^= t.x4; \ - t.x0 ^= s.x0; \ - s.x0 = ROTR64(s.x0, 28 - 19); \ - s.x0 ^= t.x0; \ - } while (0) - -#define P12() \ - do { \ - ROUND(0xf0); \ - ROUND(0xe1); \ - ROUND(0xd2); \ - ROUND(0xc3); \ - ROUND(0xb4); \ - ROUND(0xa5); \ - ROUND(0x96); \ - ROUND(0x87); \ - ROUND(0x78); \ - ROUND(0x69); \ - ROUND(0x5a); \ - ROUND(0x4b); \ - } while (0) - -#define P8() \ - do { \ - ROUND(0xb4); \ - ROUND(0xa5); \ - ROUND(0x96); \ - ROUND(0x87); \ - ROUND(0x78); \ - ROUND(0x69); \ - ROUND(0x5a); \ - ROUND(0x4b); \ - } while (0) - -#define P6() \ - do { \ - ROUND(0x96); \ - ROUND(0x87); \ - ROUND(0x78); \ - ROUND(0x69); \ - ROUND(0x5a); \ - ROUND(0x4b); \ - } while (0) - -#endif // PERMUTATIONS_H_ +#include +#include "api.h" +#include "ascon.h" +#include "printstate.h" +#include "round.h" + +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 + +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 + +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 + +#define ASCON_HASH_BYTES 32 + +#define ASCON_128_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_128A_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) + +#define ASCON_80PQ_IV \ + U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_HASH_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) + +#define ASCON_XOF_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) + +#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) +#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) +#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) +#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) +#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) + +#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) +#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) +#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) +#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) +#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 +#define IV ASCON_128_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#if ASCON_RATE == 16 +#define IV ASCON_128A_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define PB P8 +#endif + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 +#define IV ASCON_80PQ_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#define START(n) ((3 + (n)) << 4 | (12 - (n))) + +#if ASCON_UNROLL_LOOPS + +__forceinline void P12ROUNDS(state_t* s) { + ROUND(s, 0xf0); + ROUND(s, 0xe1); + ROUND(s, 0xd2); + ROUND(s, 0xc3); + ROUND(s, 0xb4); + ROUND(s, 0xa5); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +__forceinline void P8ROUNDS(state_t* s) { + ROUND(s, 0xb4); + ROUND(s, 0xa5); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +__forceinline void P6ROUNDS(state_t* s) { + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +#else /* !ASCON_UNROLL_LOOPS */ + +__forceinline void P12ROUNDS(state_t* s) { + for (int i = START(12); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +__forceinline void P8ROUNDS(state_t* s) { + for (int i = START(8); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +__forceinline void P6ROUNDS(state_t* s) { + for (int i = START(6); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +#endif + +#if ASCON_INLINE_PERM + +__forceinline void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +__forceinline void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} + +__forceinline void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#elif ASCON_SINGLE_PERM + +#define P12(s) P(s, 12) +#define P8(s) P(s, 8) +#define P6(s) P(s, 6) + +void P(state_t* s, uint8_t rounds); + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s); +void P8(state_t* s); +void P6(state_t* s); + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#endif + +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64/printstate.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64/printstate.h new file mode 100644 index 0000000..34bd476 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64/printstate.h @@ -0,0 +1,31 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64/round.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64/round.h new file mode 100644 index 0000000..077cbfd --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64/round.h @@ -0,0 +1,69 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" + +__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { + *K0 = WORD_T(0); + *K1 = WORD_T(0); + *K2 = WORD_T(0); +} + +__forceinline void PINIT(state_t* s) { + s->x0 = WORD_T(0); + s->x1 = WORD_T(0); + s->x2 = WORD_T(0); + s->x3 = WORD_T(0); + s->x4 = WORD_T(0); +} + +__forceinline void ROUND(state_t* s, uint64_t C) { + state_t t; + s->x2 ^= C; + s->x0 ^= s->x4; + s->x4 ^= s->x3; + s->x2 ^= s->x1; + t.x0 = s->x0; + t.x4 = s->x4; + t.x3 = s->x3; + t.x1 = s->x1; + t.x2 = s->x2; + s->x0 = t.x0 ^ (~t.x1 & t.x2); + s->x2 = t.x2 ^ (~t.x3 & t.x4); + s->x4 = t.x4 ^ (~t.x0 & t.x1); + s->x1 = t.x1 ^ (~t.x2 & t.x3); + s->x3 = t.x3 ^ (~t.x4 & t.x0); + s->x1 ^= s->x0; + t.x1 = s->x1; + s->x1 = ROR64(s->x1, 39); + s->x3 ^= s->x2; + t.x2 = s->x2; + s->x2 = ROR64(s->x2, 1); + t.x4 = s->x4; + t.x2 ^= s->x2; + s->x2 = ROR64(s->x2, 6 - 1); + t.x3 = s->x3; + t.x1 ^= s->x1; + s->x3 = ROR64(s->x3, 10); + s->x0 ^= s->x4; + s->x4 = ROR64(s->x4, 7); + t.x3 ^= s->x3; + s->x2 ^= t.x2; + s->x1 = ROR64(s->x1, 61 - 39); + t.x0 = s->x0; + s->x2 = ~s->x2; + s->x3 = ROR64(s->x3, 17 - 10); + t.x4 ^= s->x4; + s->x4 = ROR64(s->x4, 41 - 7); + s->x3 ^= t.x3; + s->x1 ^= t.x1; + s->x0 = ROR64(s->x0, 19); + s->x4 ^= t.x4; + t.x0 ^= s->x0; + s->x0 = ROR64(s->x0, 28 - 19); + s->x0 ^= t.x0; + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64/word.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64/word.h new file mode 100644 index 0000000..5d601bb --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64/word.h @@ -0,0 +1,47 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#include "config.h" + +typedef uint64_t word_t; + +#define WORD_T +#define UINT64_T + +#define U64TOWORD +#define WORDTOU64 + +#define XOR(a, b) \ + do { \ + (a) ^= (b); \ + } while (0) + +#define AND(a, b) \ + do { \ + (a) &= (b); \ + } while (0) + +__forceinline word_t ROR64(word_t x, int n) { return x >> n | x << (64 - n); } + +__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { + return lo2hi << 32 | hi2lo >> 32; +} + +__forceinline int NOTZERO(word_t a, word_t b) { + int result = 0; + for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i]; + for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i]; + return result; +} + +/* set padding byte in 64-bit Ascon word */ +__forceinline word_t PAD(int i) { return WORD_T(0x80ull << (56 - 8 * i)); } + +/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ +__forceinline word_t XMASK(int n) { + return WORD_T(0x00ffffffffffffffull >> (n * 8 - 8)); +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_2shares/api.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64_2shares/api.h new file mode 100644 index 0000000..951ee9c --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_2shares/api.h @@ -0,0 +1,6 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 +#define ASCON_RATE 16 diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_2shares/ascon.c b/ascon/Implementations/crypto_aead/ascon128av12/opt64_2shares/ascon.c new file mode 100644 index 0000000..9011a77 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_2shares/ascon.c @@ -0,0 +1,250 @@ +#include "ascon.h" + +#include "api.h" +#include "loadstore.h" +#include "permutations.h" +#include "printstate.h" + +__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2, + const uint8_t* k) { + KINIT(K0, K1, K2); + if (CRYPTO_KEYBYTES == 20) { + XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4))); + k += 4; + } + XOR(*K1, LOAD64(k)); + XOR(*K2, LOAD64(k + 8)); +} + +__forceinline void init(state_t* s, const uint8_t* npub, const uint8_t* k) { + word_t N0, N1; + word_t K0, K1, K2; + /* load nonce */ + N0 = LOAD64(npub); + N1 = LOAD64(npub + 8); + /* load key */ + loadkey(&K0, &K1, &K2, k); + /* initialization */ + PINIT(s); + XOR(s->x0, IV); + if (CRYPTO_KEYBYTES == 20) XOR(s->x0, K0); + XOR(s->x1, K1); + XOR(s->x2, K2); + XOR(s->x3, N0); + XOR(s->x4, N1); + P12(s); + if (CRYPTO_KEYBYTES == 20) XOR(s->x2, K0); + XOR(s->x3, K1); + XOR(s->x4, K2); + printstate("initialization", s); +} + +__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + word_t* restrict px; + /* process associated data */ + if (adlen) { + while (adlen >= ASCON_RATE) { + XOR(s->x0, LOAD64(ad)); + if (ASCON_RATE == 16) XOR(s->x1, LOAD64(ad + 8)); + PB(s); + ad += ASCON_RATE; + adlen -= ASCON_RATE; + } + /* final associated data block */ + px = &s->x0; + if (ASCON_RATE == 16 && adlen >= 8) { + XOR(s->x0, LOAD64(ad)); + px = &s->x1; + ad += 8; + adlen -= 8; + } + if (adlen) XOR(*px, LOAD(ad, adlen)); + XOR(*px, PAD(adlen)); + PB(s); + } + XOR(s->x4, WORD_T(1)); + printstate("process associated data", s); +} + +__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m, + uint64_t mlen) { + word_t* restrict px; + /* process plaintext */ + while (mlen >= ASCON_RATE) { + XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + if (ASCON_RATE == 16) { + XOR(s->x1, LOAD64(m + 8)); + STORE64(c + 8, s->x1); + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + mlen -= ASCON_RATE; + } + /* final plaintext block */ + px = &s->x0; + if (ASCON_RATE == 16 && mlen >= 8) { + XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + px = &s->x1; + m += 8; + c += 8; + mlen -= 8; + } + if (mlen) { + XOR(*px, LOAD(m, mlen)); + STORE(c, *px, mlen); + } + XOR(*px, PAD(mlen)); + printstate("process plaintext", s); +} + +__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c, + uint64_t clen) { + word_t* restrict px; + word_t cx; + /* process ciphertext */ + while (clen >= ASCON_RATE) { + cx = LOAD64(c); + XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + if (ASCON_RATE == 16) { + cx = LOAD64(c + 8); + XOR(s->x1, cx); + STORE64(m + 8, s->x1); + s->x1 = cx; + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + clen -= ASCON_RATE; + } + /* final ciphertext block */ + px = &s->x0; + if (ASCON_RATE == 16 && clen >= 8) { + cx = LOAD64(c); + XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + px = &s->x1; + m += 8; + c += 8; + clen -= 8; + } + if (clen) { + cx = LOAD(c, clen); + XOR(*px, cx); + STORE(m, *px, clen); + AND(*px, XMASK(clen)); + XOR(*px, cx); + } + XOR(*px, PAD(clen)); + printstate("process ciphertext", s); +} + +__forceinline void final(state_t* s, const uint8_t* k) { + word_t K0, K1, K2; + /* load key */ + loadkey(&K0, &K1, &K2, k); + /* finalization */ + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) { + XOR(s->x1, K1); + XOR(s->x2, K2); + } + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { + XOR(s->x2, K1); + XOR(s->x3, K2); + } + if (CRYPTO_KEYBYTES == 20) { + XOR(s->x1, KEYROT(K0, K1)); + XOR(s->x2, KEYROT(K1, K2)); + XOR(s->x3, KEYROT(K2, WORD_T(0))); + } + P12(s); + XOR(s->x3, K1); + XOR(s->x4, K2); + printstate("finalization", s); +} + +#if !ASCON_INLINE_MODE + +#define INIT ascon_init +#define ABSORB ascon_absorb +#define ENCRYPT ascon_encrypt +#define DECRYPT ascon_decrypt +#define FINAL ascon_final + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) { + init(s, npub, k); +} + +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + absorb(s, ad, adlen); +} + +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) { + encrypt(s, c, m, mlen); +} + +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) { + decrypt(s, m, c, clen); +} + +void ascon_final(state_t* s, const uint8_t* k) { final(s, k); } + +#else + +#define INIT init +#define ABSORB absorb +#define ENCRYPT encrypt +#define DECRYPT decrypt +#define FINAL final + +#endif + +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + state_t s; + (void)nsec; + *clen = mlen + CRYPTO_ABYTES; + /* perform ascon computation */ + INIT(&s, npub, k); + ABSORB(&s, ad, adlen); + ENCRYPT(&s, c, m, mlen); + FINAL(&s, k); + /* set tag */ + c += mlen; + STORE64(c, s.x3); + STORE64(c + 8, s.x4); + return 0; +} + +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { + state_t s; + (void)nsec; + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + *mlen = clen = clen - CRYPTO_ABYTES; + /* perform ascon computation */ + INIT(&s, npub, k); + ABSORB(&s, ad, adlen); + DECRYPT(&s, m, c, clen); + FINAL(&s, k); + /* verify tag (should be constant time, check compiler output) */ + c += clen; + XOR(s.x3, LOAD64(c)); + XOR(s.x4, LOAD64(c + 8)); + if (NOTZERO(s.x3, s.x4)) { + *mlen = 0; + return -1; + } + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_2shares/ascon.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64_2shares/ascon.h new file mode 100644 index 0000000..d11fa01 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_2shares/ascon.h @@ -0,0 +1,20 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#include "config.h" +#include "word.h" + +typedef struct { + word_t x0, x1, x2, x3, x4; + word_t rx; +} state_t; + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k); +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen); +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); +void ascon_final(state_t* s, const uint8_t* k); + +#endif // ASCON_H_ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_2shares/config.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64_2shares/config.h new file mode 100644 index 0000000..87b8b74 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_2shares/config.h @@ -0,0 +1,44 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* inline the Ascon mode */ +#ifndef ASCON_INLINE_MODE +#define ASCON_INLINE_MODE 1 +#endif + +/* inline the Ascon permutations */ +#ifndef ASCON_INLINE_PERM +#define ASCON_INLINE_PERM 1 +#endif + +/* single function for all permutations */ +#ifndef ASCON_SINGLE_PERM +#define ASCON_SINGLE_PERM 0 +#endif + +/* unroll the permutation loops */ +#ifndef ASCON_UNROLL_LOOPS +#define ASCON_UNROLL_LOOPS 1 +#endif + +/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ +#ifndef ASCON_DATA_ACCESS +#define ASCON_DATA_ACCESS 'H' +#endif + +/* Ascon mask generator { 'S'tdlib, 'X'orshift, 'R'andombytes } */ +#ifndef ASCON_MASK_RNG +#define ASCON_MASK_RNG 'X' +#endif + +/* mask key/data loads */ +#ifndef ASCON_MASK_LOADS +#define ASCON_MASK_LOADS 0 +#endif + +/* make sure __forceinline is supported */ +#ifndef __forceinline +#define __forceinline inline __attribute__((always_inline)) +#endif + +#endif /* CONFIG_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_2shares/endian.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64_2shares/endian.h new file mode 100644 index 0000000..3944360 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_2shares/endian.h @@ -0,0 +1,39 @@ +#ifndef ENDIAN_H_ +#define ENDIAN_H_ + +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif +#define U64BIG(x) (x) +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +#elif defined(_MSC_VER) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) +#define U32BIG(x) \ + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) + +#else +#error "Ascon byte order macros not defined in endian.h" +#endif + +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_2shares/implementors b/ascon/Implementations/crypto_aead/ascon128av12/opt64_2shares/implementors new file mode 100644 index 0000000..b110c1a --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_2shares/implementors @@ -0,0 +1,2 @@ +Christoph Dobraunig +Martin Schläffer diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_2shares/loadstore.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64_2shares/loadstore.h new file mode 100644 index 0000000..6cb0cca --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_2shares/loadstore.h @@ -0,0 +1,118 @@ +#ifndef LOADSTORE_H_ +#define LOADSTORE_H_ + +#include + +#include "config.h" +#include "endian.h" +#include "word.h" + +/* 64-bit LSB mask (undefined for n == 0) */ +#define MASK(n) (~0ull >> (64 - (n))) + +/* get byte from Ascon 64-bit word */ +#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i)))) + +/* set byte in Ascon 64-bit word */ +#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i))) + +#if ASCON_DATA_ACCESS == 'W' + +#ifndef NDEBUG +#pragma message("Using wordwise data access") +#endif + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = U64BIG(*(uint64_t*)bytes); + return U64TOWORD(x); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = U64BIG(*(uint64_t*)bytes & MASK(8 * n)); + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(8 * n); + *(uint64_t*)bytes |= U64BIG(x); +} + +#elif ASCON_DATA_ACCESS == 'M' + +#ifndef NDEBUG +#pragma message("Using memcpy to access data") +#endif + +#include + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + memcpy((uint8_t*)&x, bytes, n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = U64BIG(WORDTOU64(w)); + memcpy(bytes, (uint8_t*)&x, n); +} + +#elif ASCON_DATA_ACCESS == 'B' + +#ifndef NDEBUG +#pragma message("Using bytewise data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#elif ASCON_DATA_ACCESS == 'H' + +#ifndef NDEBUG +#pragma message("Using hybrid data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + if (n == 8) + x = U64BIG(*(uint64_t*)bytes); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + if (n == 8) + *(uint64_t*)bytes = U64BIG(x); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#else +#error "Ascon data access macro not defined correctly" +#endif + +#endif /* LOADSTORE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_2shares/permutations.c b/ascon/Implementations/crypto_aead/ascon128av12/opt64_2shares/permutations.c new file mode 100644 index 0000000..3aecaa6 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_2shares/permutations.c @@ -0,0 +1,26 @@ +#include "permutations.h" + +#include "round.h" + +#if ASCON_INLINE_PERM + +#elif ASCON_SINGLE_PERM + +void P(state_t* s, uint8_t rounds) { + printstate(" permutation input", s); + for (int i = START(rounds); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s) { P12ROUNDS(s); } + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16 +void P8(state_t* s) { P8ROUNDS(s); } +#endif + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 +void P6(state_t* s) { P6ROUNDS(s); } +#endif + +#endif diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_2shares/permutations.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64_2shares/permutations.h new file mode 100644 index 0000000..6172dd5 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_2shares/permutations.h @@ -0,0 +1,184 @@ +#ifndef PERMUTATIONS_H_ +#define PERMUTATIONS_H_ + +#include + +#include "api.h" +#include "ascon.h" +#include "printstate.h" +#include "round.h" + +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 + +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 + +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 + +#define ASCON_HASH_BYTES 32 + +#define ASCON_128_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_128A_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) + +#define ASCON_80PQ_IV \ + U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_HASH_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) + +#define ASCON_XOF_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) + +#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) +#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) +#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) +#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) +#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) + +#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) +#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) +#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) +#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) +#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 +#define IV ASCON_128_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#if ASCON_RATE == 16 +#define IV ASCON_128A_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define PB P8 +#endif + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 +#define IV ASCON_80PQ_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#define START(n) ((3 + (n)) << 4 | (12 - (n))) + +#if ASCON_UNROLL_LOOPS + +__forceinline void P12ROUNDS(state_t* s) { + ROUND(s, 0xf0); + ROUND(s, 0xe1); + ROUND(s, 0xd2); + ROUND(s, 0xc3); + ROUND(s, 0xb4); + ROUND(s, 0xa5); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +__forceinline void P8ROUNDS(state_t* s) { + ROUND(s, 0xb4); + ROUND(s, 0xa5); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +__forceinline void P6ROUNDS(state_t* s) { + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +#else /* !ASCON_UNROLL_LOOPS */ + +__forceinline void P12ROUNDS(state_t* s) { + for (int i = START(12); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +__forceinline void P8ROUNDS(state_t* s) { + for (int i = START(8); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +__forceinline void P6ROUNDS(state_t* s) { + for (int i = START(6); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +#endif + +#if ASCON_INLINE_PERM + +__forceinline void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +__forceinline void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} + +__forceinline void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#elif ASCON_SINGLE_PERM + +#define P12(s) P(s, 12) +#define P8(s) P(s, 8) +#define P6(s) P(s, 6) + +void P(state_t* s, uint8_t rounds); + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s); +void P8(state_t* s); +void P6(state_t* s); + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#endif + +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_2shares/printstate.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64_2shares/printstate.h new file mode 100644 index 0000000..34bd476 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_2shares/printstate.h @@ -0,0 +1,31 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_2shares/random.c b/ascon/Implementations/crypto_aead/ascon128av12/opt64_2shares/random.c new file mode 100644 index 0000000..81d0081 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_2shares/random.c @@ -0,0 +1,62 @@ + +#include "random.h" + +#include +#include + +#include "config.h" + +#if ASCON_MASK_RNG == 'R' + +void randinit() { srand(time(0)); } + +uint32_t rand32() { + uint32_t r; + randombytes(&r, 4); + return r; +} + +uint64_t rand64() { + uint64_t r; + randombytes(&r, 8); + return r; +} + +#elif ASCON_MASK_RNG == 'S' + +void randinit() { srand(time(0)); } + +uint32_t rand32() { return ((uint32_t)rand() << 21) ^ rand(); } + +uint64_t rand64() { + return ((uint64_t)rand() << 43) ^ ((uint64_t)rand() << 21) ^ rand(); +} + +#elif ASCON_MASK_RNG == 'X' + +uint32_t xorshift32; +uint64_t xorshift64; + +void randinit() { + srand(time(0)); + xorshift32 = rand(); + xorshift64 = (uint64_t)rand() << 32 | rand(); +} + +uint32_t rand32() { + uint32_t x = xorshift32; + x ^= x << 13; + x ^= x >> 17; + x ^= x << 5; + return xorshift32 = x; +} + +uint64_t rand64() { + uint64_t x = xorshift64; + x ^= x << 13; + x ^= x >> 7; + x ^= x << 17; + return xorshift64 = x; +} + +#endif diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_2shares/random.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64_2shares/random.h new file mode 100644 index 0000000..89226b6 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_2shares/random.h @@ -0,0 +1,10 @@ +#ifndef RANDOM_H_ +#define RANDOM_H_ + +#include + +void randinit(); +uint32_t rand32(); +uint64_t rand64(); + +#endif /* RANDOM_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_2shares/round.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64_2shares/round.h new file mode 100644 index 0000000..4b27aaa --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_2shares/round.h @@ -0,0 +1,75 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" +#include "random.h" + +__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { + *K0 = TOSHARES(0); + *K1 = TOSHARES(0); + *K2 = TOSHARES(0); +} + +__forceinline void PINIT(state_t* s) { + randinit(); + s->x0 = TOSHARES(0); + s->x1 = TOSHARES(0); + s->x2 = TOSHARES(0); + s->x3 = TOSHARES(0); + s->x4 = TOSHARES(0); + s->rx = TOSHARES(0); +} + +#define TOFFOLI(a0, a1, b0, b1, c0, c1) \ + do { \ + (a0) ^= (~(b0)) & (c1); \ + (a0) ^= (~(b0)) & (c0); \ + (a1) ^= (b1) & (c1); \ + (a1) ^= (b1) & (c0); \ + } while (0) + +__forceinline void ROUND(state_t* s, uint64_t C) { + /* refresh randomness */ + /* s->rx = TOSHARES(0); */ + /* addition of round constant */ + s->x2.s0 ^= C; + /* substitution layer */ + s->x0.s0 ^= s->x4.s0; + s->x4.s0 ^= s->x3.s0; + s->x2.s0 ^= s->x1.s0; + s->x0.s1 ^= s->x4.s1; + s->x4.s1 ^= s->x3.s1; + s->x2.s1 ^= s->x1.s1; + /* start of shared keccak s-box from https://eprint.iacr.org/2019/536 */ + s->rx.s0 = s->rx.s1; + TOFFOLI(s->rx.s0, s->rx.s1, s->x4.s0, s->x4.s1, s->x0.s0, s->x0.s1); + TOFFOLI(s->x0.s0, s->x0.s1, s->x1.s0, s->x1.s1, s->x2.s0, s->x2.s1); + TOFFOLI(s->x2.s0, s->x2.s1, s->x3.s0, s->x3.s1, s->x4.s0, s->x4.s1); + TOFFOLI(s->x4.s0, s->x4.s1, s->x0.s0, s->x0.s1, s->x1.s0, s->x1.s1); + TOFFOLI(s->x1.s0, s->x1.s1, s->x2.s0, s->x2.s1, s->x3.s0, s->x3.s1); + s->x3.s1 ^= s->rx.s1; + s->x3.s0 ^= s->rx.s0; + /* end of shared keccak s-box */ + s->x1.s0 ^= s->x0.s0; + s->x0.s0 ^= s->x4.s0; + s->x3.s0 ^= s->x2.s0; + s->x2.s0 = ~s->x2.s0; + s->x1.s1 ^= s->x0.s1; + s->x0.s1 ^= s->x4.s1; + s->x3.s1 ^= s->x2.s1; + /* linear diffusion layer */ + s->x0.s1 ^= ROR64(s->x0.s1, 19) ^ ROR64(s->x0.s1, 28); + s->x1.s1 ^= ROR64(s->x1.s1, 61) ^ ROR64(s->x1.s1, 39); + s->x2.s1 ^= ROR64(s->x2.s1, 1) ^ ROR64(s->x2.s1, 6); + s->x3.s1 ^= ROR64(s->x3.s1, 10) ^ ROR64(s->x3.s1, 17); + s->x4.s1 ^= ROR64(s->x4.s1, 7) ^ ROR64(s->x4.s1, 41); + s->x0.s0 ^= ROR64(s->x0.s0, 19) ^ ROR64(s->x0.s0, 28); + s->x1.s0 ^= ROR64(s->x1.s0, 61) ^ ROR64(s->x1.s0, 39); + s->x2.s0 ^= ROR64(s->x2.s0, 1) ^ ROR64(s->x2.s0, 6); + s->x3.s0 ^= ROR64(s->x3.s0, 10) ^ ROR64(s->x3.s0, 17); + s->x4.s0 ^= ROR64(s->x4.s0, 7) ^ ROR64(s->x4.s0, 41); + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_2shares/word.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64_2shares/word.h new file mode 100644 index 0000000..6f1effc --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_2shares/word.h @@ -0,0 +1,80 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#include "config.h" +#include "random.h" + +typedef uint64_t share_t; + +typedef struct { + share_t s0; + share_t s1; +} word_t; + +__forceinline word_t WORD_T(uint64_t x) { return (word_t){x, 0}; } + +__forceinline uint64_t UINT64_T(word_t w) { return w.s0; } + +__forceinline word_t TOSHARES(share_t in) { + uint64_t r0 = rand64(); + return (word_t){in ^ r0, r0}; +} + +__forceinline share_t FROMSHARES(word_t in) { return in.s0 ^ in.s1; } + +__forceinline word_t U64TOWORD(uint64_t x) { +#if ASCON_MASK_LOADS + return TOSHARES(x); +#else + return WORD_T(x); +#endif +} + +__forceinline uint64_t WORDTOU64(word_t w) { return FROMSHARES(w); } + +#define XOR(a, b) \ + do { \ + word_t t = b; \ + (a).s0 ^= t.s0; \ + (a).s1 ^= t.s1; \ + } while (0) + +#define AND(a, b) \ + do { \ + word_t ta = a; \ + word_t tb = b; \ + (a).s0 = (ta.s0 & tb.s0) ^ (ta.s0 & tb.s1); \ + (a).s1 = (ta.s1 & tb.s0) ^ (ta.s1 & tb.s1); \ + } while (0) + +__forceinline uint64_t ROR64(uint64_t x, int n) { + return x >> n | x << (64 - n); +} + +__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { + word_t r; + r.s0 = lo2hi.s0 << 32 | hi2lo.s0 >> 32; + r.s1 = lo2hi.s1 << 32 | hi2lo.s1 >> 32; + return r; +} + +__forceinline int NOTZERO(word_t a, word_t b) { + int result = 0; + for (int i = 0; i < 8; ++i) + result |= ((uint8_t*)&(a.s0))[i] ^ ((uint8_t*)&(a.s1))[i]; + for (int i = 0; i < 8; ++i) + result |= ((uint8_t*)&(b.s0))[i] ^ ((uint8_t*)&(b.s1))[i]; + return result; +} + +/* set padding byte in 64-bit Ascon word */ +__forceinline word_t PAD(int i) { return WORD_T(0x80ull << (56 - 8 * i)); } + +/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ +__forceinline word_t XMASK(int n) { + return WORD_T(0x00ffffffffffffffull >> (n * 8 - 8)); +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_3shares/api.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64_3shares/api.h new file mode 100644 index 0000000..951ee9c --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_3shares/api.h @@ -0,0 +1,6 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 +#define ASCON_RATE 16 diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_3shares/ascon.c b/ascon/Implementations/crypto_aead/ascon128av12/opt64_3shares/ascon.c new file mode 100644 index 0000000..9011a77 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_3shares/ascon.c @@ -0,0 +1,250 @@ +#include "ascon.h" + +#include "api.h" +#include "loadstore.h" +#include "permutations.h" +#include "printstate.h" + +__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2, + const uint8_t* k) { + KINIT(K0, K1, K2); + if (CRYPTO_KEYBYTES == 20) { + XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4))); + k += 4; + } + XOR(*K1, LOAD64(k)); + XOR(*K2, LOAD64(k + 8)); +} + +__forceinline void init(state_t* s, const uint8_t* npub, const uint8_t* k) { + word_t N0, N1; + word_t K0, K1, K2; + /* load nonce */ + N0 = LOAD64(npub); + N1 = LOAD64(npub + 8); + /* load key */ + loadkey(&K0, &K1, &K2, k); + /* initialization */ + PINIT(s); + XOR(s->x0, IV); + if (CRYPTO_KEYBYTES == 20) XOR(s->x0, K0); + XOR(s->x1, K1); + XOR(s->x2, K2); + XOR(s->x3, N0); + XOR(s->x4, N1); + P12(s); + if (CRYPTO_KEYBYTES == 20) XOR(s->x2, K0); + XOR(s->x3, K1); + XOR(s->x4, K2); + printstate("initialization", s); +} + +__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + word_t* restrict px; + /* process associated data */ + if (adlen) { + while (adlen >= ASCON_RATE) { + XOR(s->x0, LOAD64(ad)); + if (ASCON_RATE == 16) XOR(s->x1, LOAD64(ad + 8)); + PB(s); + ad += ASCON_RATE; + adlen -= ASCON_RATE; + } + /* final associated data block */ + px = &s->x0; + if (ASCON_RATE == 16 && adlen >= 8) { + XOR(s->x0, LOAD64(ad)); + px = &s->x1; + ad += 8; + adlen -= 8; + } + if (adlen) XOR(*px, LOAD(ad, adlen)); + XOR(*px, PAD(adlen)); + PB(s); + } + XOR(s->x4, WORD_T(1)); + printstate("process associated data", s); +} + +__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m, + uint64_t mlen) { + word_t* restrict px; + /* process plaintext */ + while (mlen >= ASCON_RATE) { + XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + if (ASCON_RATE == 16) { + XOR(s->x1, LOAD64(m + 8)); + STORE64(c + 8, s->x1); + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + mlen -= ASCON_RATE; + } + /* final plaintext block */ + px = &s->x0; + if (ASCON_RATE == 16 && mlen >= 8) { + XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + px = &s->x1; + m += 8; + c += 8; + mlen -= 8; + } + if (mlen) { + XOR(*px, LOAD(m, mlen)); + STORE(c, *px, mlen); + } + XOR(*px, PAD(mlen)); + printstate("process plaintext", s); +} + +__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c, + uint64_t clen) { + word_t* restrict px; + word_t cx; + /* process ciphertext */ + while (clen >= ASCON_RATE) { + cx = LOAD64(c); + XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + if (ASCON_RATE == 16) { + cx = LOAD64(c + 8); + XOR(s->x1, cx); + STORE64(m + 8, s->x1); + s->x1 = cx; + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + clen -= ASCON_RATE; + } + /* final ciphertext block */ + px = &s->x0; + if (ASCON_RATE == 16 && clen >= 8) { + cx = LOAD64(c); + XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + px = &s->x1; + m += 8; + c += 8; + clen -= 8; + } + if (clen) { + cx = LOAD(c, clen); + XOR(*px, cx); + STORE(m, *px, clen); + AND(*px, XMASK(clen)); + XOR(*px, cx); + } + XOR(*px, PAD(clen)); + printstate("process ciphertext", s); +} + +__forceinline void final(state_t* s, const uint8_t* k) { + word_t K0, K1, K2; + /* load key */ + loadkey(&K0, &K1, &K2, k); + /* finalization */ + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) { + XOR(s->x1, K1); + XOR(s->x2, K2); + } + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { + XOR(s->x2, K1); + XOR(s->x3, K2); + } + if (CRYPTO_KEYBYTES == 20) { + XOR(s->x1, KEYROT(K0, K1)); + XOR(s->x2, KEYROT(K1, K2)); + XOR(s->x3, KEYROT(K2, WORD_T(0))); + } + P12(s); + XOR(s->x3, K1); + XOR(s->x4, K2); + printstate("finalization", s); +} + +#if !ASCON_INLINE_MODE + +#define INIT ascon_init +#define ABSORB ascon_absorb +#define ENCRYPT ascon_encrypt +#define DECRYPT ascon_decrypt +#define FINAL ascon_final + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) { + init(s, npub, k); +} + +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + absorb(s, ad, adlen); +} + +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) { + encrypt(s, c, m, mlen); +} + +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) { + decrypt(s, m, c, clen); +} + +void ascon_final(state_t* s, const uint8_t* k) { final(s, k); } + +#else + +#define INIT init +#define ABSORB absorb +#define ENCRYPT encrypt +#define DECRYPT decrypt +#define FINAL final + +#endif + +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + state_t s; + (void)nsec; + *clen = mlen + CRYPTO_ABYTES; + /* perform ascon computation */ + INIT(&s, npub, k); + ABSORB(&s, ad, adlen); + ENCRYPT(&s, c, m, mlen); + FINAL(&s, k); + /* set tag */ + c += mlen; + STORE64(c, s.x3); + STORE64(c + 8, s.x4); + return 0; +} + +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { + state_t s; + (void)nsec; + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + *mlen = clen = clen - CRYPTO_ABYTES; + /* perform ascon computation */ + INIT(&s, npub, k); + ABSORB(&s, ad, adlen); + DECRYPT(&s, m, c, clen); + FINAL(&s, k); + /* verify tag (should be constant time, check compiler output) */ + c += clen; + XOR(s.x3, LOAD64(c)); + XOR(s.x4, LOAD64(c + 8)); + if (NOTZERO(s.x3, s.x4)) { + *mlen = 0; + return -1; + } + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_3shares/ascon.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64_3shares/ascon.h new file mode 100644 index 0000000..d11fa01 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_3shares/ascon.h @@ -0,0 +1,20 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#include "config.h" +#include "word.h" + +typedef struct { + word_t x0, x1, x2, x3, x4; + word_t rx; +} state_t; + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k); +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen); +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); +void ascon_final(state_t* s, const uint8_t* k); + +#endif // ASCON_H_ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_3shares/config.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64_3shares/config.h new file mode 100644 index 0000000..87b8b74 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_3shares/config.h @@ -0,0 +1,44 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* inline the Ascon mode */ +#ifndef ASCON_INLINE_MODE +#define ASCON_INLINE_MODE 1 +#endif + +/* inline the Ascon permutations */ +#ifndef ASCON_INLINE_PERM +#define ASCON_INLINE_PERM 1 +#endif + +/* single function for all permutations */ +#ifndef ASCON_SINGLE_PERM +#define ASCON_SINGLE_PERM 0 +#endif + +/* unroll the permutation loops */ +#ifndef ASCON_UNROLL_LOOPS +#define ASCON_UNROLL_LOOPS 1 +#endif + +/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ +#ifndef ASCON_DATA_ACCESS +#define ASCON_DATA_ACCESS 'H' +#endif + +/* Ascon mask generator { 'S'tdlib, 'X'orshift, 'R'andombytes } */ +#ifndef ASCON_MASK_RNG +#define ASCON_MASK_RNG 'X' +#endif + +/* mask key/data loads */ +#ifndef ASCON_MASK_LOADS +#define ASCON_MASK_LOADS 0 +#endif + +/* make sure __forceinline is supported */ +#ifndef __forceinline +#define __forceinline inline __attribute__((always_inline)) +#endif + +#endif /* CONFIG_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_3shares/endian.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64_3shares/endian.h new file mode 100644 index 0000000..3944360 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_3shares/endian.h @@ -0,0 +1,39 @@ +#ifndef ENDIAN_H_ +#define ENDIAN_H_ + +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif +#define U64BIG(x) (x) +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +#elif defined(_MSC_VER) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) +#define U32BIG(x) \ + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) + +#else +#error "Ascon byte order macros not defined in endian.h" +#endif + +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_3shares/implementors b/ascon/Implementations/crypto_aead/ascon128av12/opt64_3shares/implementors new file mode 100644 index 0000000..b110c1a --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_3shares/implementors @@ -0,0 +1,2 @@ +Christoph Dobraunig +Martin Schläffer diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_3shares/loadstore.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64_3shares/loadstore.h new file mode 100644 index 0000000..6cb0cca --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_3shares/loadstore.h @@ -0,0 +1,118 @@ +#ifndef LOADSTORE_H_ +#define LOADSTORE_H_ + +#include + +#include "config.h" +#include "endian.h" +#include "word.h" + +/* 64-bit LSB mask (undefined for n == 0) */ +#define MASK(n) (~0ull >> (64 - (n))) + +/* get byte from Ascon 64-bit word */ +#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i)))) + +/* set byte in Ascon 64-bit word */ +#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i))) + +#if ASCON_DATA_ACCESS == 'W' + +#ifndef NDEBUG +#pragma message("Using wordwise data access") +#endif + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = U64BIG(*(uint64_t*)bytes); + return U64TOWORD(x); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = U64BIG(*(uint64_t*)bytes & MASK(8 * n)); + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(8 * n); + *(uint64_t*)bytes |= U64BIG(x); +} + +#elif ASCON_DATA_ACCESS == 'M' + +#ifndef NDEBUG +#pragma message("Using memcpy to access data") +#endif + +#include + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + memcpy((uint8_t*)&x, bytes, n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = U64BIG(WORDTOU64(w)); + memcpy(bytes, (uint8_t*)&x, n); +} + +#elif ASCON_DATA_ACCESS == 'B' + +#ifndef NDEBUG +#pragma message("Using bytewise data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#elif ASCON_DATA_ACCESS == 'H' + +#ifndef NDEBUG +#pragma message("Using hybrid data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + if (n == 8) + x = U64BIG(*(uint64_t*)bytes); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + if (n == 8) + *(uint64_t*)bytes = U64BIG(x); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#else +#error "Ascon data access macro not defined correctly" +#endif + +#endif /* LOADSTORE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_3shares/permutations.c b/ascon/Implementations/crypto_aead/ascon128av12/opt64_3shares/permutations.c new file mode 100644 index 0000000..3aecaa6 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_3shares/permutations.c @@ -0,0 +1,26 @@ +#include "permutations.h" + +#include "round.h" + +#if ASCON_INLINE_PERM + +#elif ASCON_SINGLE_PERM + +void P(state_t* s, uint8_t rounds) { + printstate(" permutation input", s); + for (int i = START(rounds); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s) { P12ROUNDS(s); } + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16 +void P8(state_t* s) { P8ROUNDS(s); } +#endif + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 +void P6(state_t* s) { P6ROUNDS(s); } +#endif + +#endif diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_3shares/permutations.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64_3shares/permutations.h new file mode 100644 index 0000000..6172dd5 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_3shares/permutations.h @@ -0,0 +1,184 @@ +#ifndef PERMUTATIONS_H_ +#define PERMUTATIONS_H_ + +#include + +#include "api.h" +#include "ascon.h" +#include "printstate.h" +#include "round.h" + +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 + +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 + +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 + +#define ASCON_HASH_BYTES 32 + +#define ASCON_128_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_128A_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) + +#define ASCON_80PQ_IV \ + U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_HASH_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) + +#define ASCON_XOF_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) + +#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) +#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) +#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) +#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) +#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) + +#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) +#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) +#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) +#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) +#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 +#define IV ASCON_128_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#if ASCON_RATE == 16 +#define IV ASCON_128A_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define PB P8 +#endif + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 +#define IV ASCON_80PQ_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#define START(n) ((3 + (n)) << 4 | (12 - (n))) + +#if ASCON_UNROLL_LOOPS + +__forceinline void P12ROUNDS(state_t* s) { + ROUND(s, 0xf0); + ROUND(s, 0xe1); + ROUND(s, 0xd2); + ROUND(s, 0xc3); + ROUND(s, 0xb4); + ROUND(s, 0xa5); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +__forceinline void P8ROUNDS(state_t* s) { + ROUND(s, 0xb4); + ROUND(s, 0xa5); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +__forceinline void P6ROUNDS(state_t* s) { + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +#else /* !ASCON_UNROLL_LOOPS */ + +__forceinline void P12ROUNDS(state_t* s) { + for (int i = START(12); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +__forceinline void P8ROUNDS(state_t* s) { + for (int i = START(8); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +__forceinline void P6ROUNDS(state_t* s) { + for (int i = START(6); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +#endif + +#if ASCON_INLINE_PERM + +__forceinline void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +__forceinline void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} + +__forceinline void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#elif ASCON_SINGLE_PERM + +#define P12(s) P(s, 12) +#define P8(s) P(s, 8) +#define P6(s) P(s, 6) + +void P(state_t* s, uint8_t rounds); + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s); +void P8(state_t* s); +void P6(state_t* s); + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#endif + +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_3shares/printstate.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64_3shares/printstate.h new file mode 100644 index 0000000..34bd476 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_3shares/printstate.h @@ -0,0 +1,31 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_3shares/random.c b/ascon/Implementations/crypto_aead/ascon128av12/opt64_3shares/random.c new file mode 100644 index 0000000..81d0081 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_3shares/random.c @@ -0,0 +1,62 @@ + +#include "random.h" + +#include +#include + +#include "config.h" + +#if ASCON_MASK_RNG == 'R' + +void randinit() { srand(time(0)); } + +uint32_t rand32() { + uint32_t r; + randombytes(&r, 4); + return r; +} + +uint64_t rand64() { + uint64_t r; + randombytes(&r, 8); + return r; +} + +#elif ASCON_MASK_RNG == 'S' + +void randinit() { srand(time(0)); } + +uint32_t rand32() { return ((uint32_t)rand() << 21) ^ rand(); } + +uint64_t rand64() { + return ((uint64_t)rand() << 43) ^ ((uint64_t)rand() << 21) ^ rand(); +} + +#elif ASCON_MASK_RNG == 'X' + +uint32_t xorshift32; +uint64_t xorshift64; + +void randinit() { + srand(time(0)); + xorshift32 = rand(); + xorshift64 = (uint64_t)rand() << 32 | rand(); +} + +uint32_t rand32() { + uint32_t x = xorshift32; + x ^= x << 13; + x ^= x >> 17; + x ^= x << 5; + return xorshift32 = x; +} + +uint64_t rand64() { + uint64_t x = xorshift64; + x ^= x << 13; + x ^= x >> 7; + x ^= x << 17; + return xorshift64 = x; +} + +#endif diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_3shares/random.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64_3shares/random.h new file mode 100644 index 0000000..89226b6 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_3shares/random.h @@ -0,0 +1,10 @@ +#ifndef RANDOM_H_ +#define RANDOM_H_ + +#include + +void randinit(); +uint32_t rand32(); +uint64_t rand64(); + +#endif /* RANDOM_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_3shares/round.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64_3shares/round.h new file mode 100644 index 0000000..ef5ed5e --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_3shares/round.h @@ -0,0 +1,98 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" +#include "random.h" + +__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { + *K0 = TOSHARES(0); + *K1 = TOSHARES(0); + *K2 = TOSHARES(0); +} + +__forceinline void PINIT(state_t* s) { + randinit(); + s->x0 = TOSHARES(0); + s->x1 = TOSHARES(0); + s->x2 = TOSHARES(0); + s->x3 = TOSHARES(0); + s->x4 = TOSHARES(0); + s->rx = TOSHARES(0); +} + +#define TOFFOLI(a0, a1, a2, b0, b1, b2, c0, c1, c2) \ + do { \ + (a0) ^= (~(b0)) & (c0); \ + (a0) ^= (b0) & (c2); \ + (a0) ^= (b2) & (c0); \ + (a1) ^= (~(b1)) & (c1); \ + (a1) ^= (b1) & (c0); \ + (a1) ^= (b0) & (c1); \ + (a2) ^= (~(b2)) & (c2); \ + (a2) ^= (b2) & (c1); \ + (a2) ^= (b1) & (c2); \ + } while (0) + +__forceinline void ROUND(state_t* s, uint64_t C) { + /* refresh randomness */ + /* s->rx = TOSHARES(0); */ + /* addition of round constant */ + s->x2.s0 ^= C; + /* substitution layer */ + s->x0.s0 ^= s->x4.s0; + s->x4.s0 ^= s->x3.s0; + s->x2.s0 ^= s->x1.s0; + s->x0.s1 ^= s->x4.s1; + s->x4.s1 ^= s->x3.s1; + s->x2.s1 ^= s->x1.s1; + s->x0.s2 ^= s->x4.s2; + s->x4.s2 ^= s->x3.s2; + s->x2.s2 ^= s->x1.s2; + /* start of shared keccak s-box from https://eprint.iacr.org/2019/536 */ + s->rx.s2 = s->rx.s0; + s->rx.s0 ^= s->rx.s1; + TOFFOLI(s->rx.s0, s->rx.s1, s->rx.s2, s->x4.s0, s->x4.s1, s->x4.s2, s->x0.s0, + s->x0.s1, s->x0.s2); + TOFFOLI(s->x0.s0, s->x0.s1, s->x0.s2, s->x1.s0, s->x1.s1, s->x1.s2, s->x2.s0, + s->x2.s1, s->x2.s2); + TOFFOLI(s->x2.s0, s->x2.s1, s->x2.s2, s->x3.s0, s->x3.s1, s->x3.s2, s->x4.s0, + s->x4.s1, s->x4.s2); + TOFFOLI(s->x4.s0, s->x4.s1, s->x4.s2, s->x0.s0, s->x0.s1, s->x0.s2, s->x1.s0, + s->x1.s1, s->x1.s2); + TOFFOLI(s->x1.s0, s->x1.s1, s->x1.s2, s->x2.s0, s->x2.s1, s->x2.s2, s->x3.s0, + s->x3.s1, s->x3.s2); + s->x3.s2 ^= s->rx.s2; + s->x3.s1 ^= s->rx.s1; + s->x3.s0 ^= s->rx.s0; + /* end of shared keccak s-box */ + s->x1.s0 ^= s->x0.s0; + s->x0.s0 ^= s->x4.s0; + s->x3.s0 ^= s->x2.s0; + s->x2.s0 = ~s->x2.s0; + s->x1.s1 ^= s->x0.s1; + s->x0.s1 ^= s->x4.s1; + s->x3.s1 ^= s->x2.s1; + s->x1.s2 ^= s->x0.s2; + s->x0.s2 ^= s->x4.s2; + s->x3.s2 ^= s->x2.s2; + /* linear diffusion layer */ + s->x0.s2 ^= ROR64(s->x0.s2, 19) ^ ROR64(s->x0.s2, 28); + s->x1.s2 ^= ROR64(s->x1.s2, 61) ^ ROR64(s->x1.s2, 39); + s->x2.s2 ^= ROR64(s->x2.s2, 1) ^ ROR64(s->x2.s2, 6); + s->x3.s2 ^= ROR64(s->x3.s2, 10) ^ ROR64(s->x3.s2, 17); + s->x4.s2 ^= ROR64(s->x4.s2, 7) ^ ROR64(s->x4.s2, 41); + s->x0.s1 ^= ROR64(s->x0.s1, 19) ^ ROR64(s->x0.s1, 28); + s->x1.s1 ^= ROR64(s->x1.s1, 61) ^ ROR64(s->x1.s1, 39); + s->x2.s1 ^= ROR64(s->x2.s1, 1) ^ ROR64(s->x2.s1, 6); + s->x3.s1 ^= ROR64(s->x3.s1, 10) ^ ROR64(s->x3.s1, 17); + s->x4.s1 ^= ROR64(s->x4.s1, 7) ^ ROR64(s->x4.s1, 41); + s->x0.s0 ^= ROR64(s->x0.s0, 19) ^ ROR64(s->x0.s0, 28); + s->x1.s0 ^= ROR64(s->x1.s0, 61) ^ ROR64(s->x1.s0, 39); + s->x2.s0 ^= ROR64(s->x2.s0, 1) ^ ROR64(s->x2.s0, 6); + s->x3.s0 ^= ROR64(s->x3.s0, 10) ^ ROR64(s->x3.s0, 17); + s->x4.s0 ^= ROR64(s->x4.s0, 7) ^ ROR64(s->x4.s0, 41); + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_3shares/word.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64_3shares/word.h new file mode 100644 index 0000000..b18e527 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_3shares/word.h @@ -0,0 +1,87 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#include "config.h" +#include "random.h" + +typedef uint64_t share_t; + +typedef struct { + share_t s0; + share_t s1; + share_t s2; +} word_t; + +__forceinline word_t WORD_T(uint64_t x) { return (word_t){x, 0, 0}; } + +__forceinline uint64_t UINT64_T(word_t w) { return w.s0; } + +__forceinline word_t TOSHARES(share_t in) { + uint64_t r0 = rand64(); + uint64_t r1 = rand64(); + return (word_t){in ^ r0 ^ r1, r0, r1}; +} + +__forceinline share_t FROMSHARES(word_t in) { return in.s0 ^ in.s1 ^ in.s2; } + +__forceinline word_t U64TOWORD(uint64_t x) { +#if ASCON_MASK_LOADS + return TOSHARES(x); +#else + return WORD_T(x); +#endif +} + +__forceinline uint64_t WORDTOU64(word_t w) { return FROMSHARES(w); } + +#define XOR(a, b) \ + do { \ + word_t t = b; \ + (a).s0 ^= t.s0; \ + (a).s1 ^= t.s1; \ + (a).s2 ^= t.s2; \ + } while (0) + +#define AND(a, b) \ + do { \ + word_t ta = a; \ + word_t tb = b; \ + (a).s0 = (ta.s0 & tb.s0) ^ (ta.s0 & tb.s1) ^ (ta.s0 & tb.s2); \ + (a).s1 = (ta.s1 & tb.s0) ^ (ta.s1 & tb.s1) ^ (ta.s1 & tb.s2); \ + (a).s2 = (ta.s2 & tb.s0) ^ (ta.s2 & tb.s1) ^ (ta.s2 & tb.s2); \ + } while (0) + +__forceinline uint64_t ROR64(uint64_t x, int n) { + return x >> n | x << (64 - n); +} + +__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { + word_t r; + r.s0 = lo2hi.s0 << 32 | hi2lo.s0 >> 32; + r.s1 = lo2hi.s1 << 32 | hi2lo.s1 >> 32; + r.s2 = lo2hi.s2 << 32 | hi2lo.s2 >> 32; + return r; +} + +__forceinline int NOTZERO(word_t a, word_t b) { + int result = 0; + for (int i = 0; i < 8; ++i) + result |= ((uint8_t*)&(a.s0))[i] ^ ((uint8_t*)&(a.s1))[i] ^ + ((uint8_t*)&(a.s2))[i]; + for (int i = 0; i < 8; ++i) + result |= ((uint8_t*)&(b.s0))[i] ^ ((uint8_t*)&(b.s1))[i] ^ + ((uint8_t*)&(b.s2))[i]; + return result; +} + +/* set padding byte in 64-bit Ascon word */ +__forceinline word_t PAD(int i) { return WORD_T(0x80ull << (56 - 8 * i)); } + +/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ +__forceinline word_t XMASK(int n) { + return WORD_T(0x00ffffffffffffffull >> (n * 8 - 8)); +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/api.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/api.h index a4aa567..951ee9c 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/api.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/api.h @@ -3,3 +3,4 @@ #define CRYPTO_NPUBBYTES 16 #define CRYPTO_ABYTES 16 #define CRYPTO_NOOVERLAP 1 +#define ASCON_RATE 16 diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/ascon.c b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/ascon.c new file mode 100644 index 0000000..5e60f1a --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/ascon.c @@ -0,0 +1,63 @@ +#include "ascon.h" + +#include "api.h" +#include "loadstore.h" +#include "permutations.h" +#include "printstate.h" + +void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, + uint8_t mode); + +void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen, + const uint8_t* ad, uint64_t adlen, const uint8_t* npub, + const uint8_t* k, uint8_t mode) { + word_t N0, N1, K0, K1, K2; + /* load nonce */ + N0 = LOAD64(npub); + N1 = LOAD64(npub + 8); + /* load key */ + if (CRYPTO_KEYBYTES == 20) { + K0 = KEYROT(WORD_T(0), LOAD(k, 4)); + k += 4; + } + K1 = LOAD64(k); + K2 = LOAD64(k + 8); + /* initialization */ + s->x0 = IV; + if (CRYPTO_KEYBYTES == 20) XOR(s->x0, K0); + s->x1 = K1; + s->x2 = K2; + s->x3 = N0; + s->x4 = N1; + P12(s); + if (CRYPTO_KEYBYTES == 20) XOR(s->x2, K0); + XOR(s->x3, K1); + XOR(s->x4, K2); + printstate("initialization", s); + /* process associated data */ + if (adlen) { + process_data(s, (void*)0, ad, adlen, ASCON_AD); + PB(s); + } + XOR(s->x4, WORD_T(1)); + /* process plaintext/ciphertext */ + process_data(s, out, in, tlen, mode); + /* finalization */ + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) { + XOR(s->x1, K1); + XOR(s->x2, K2); + } + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { + XOR(s->x2, K1); + XOR(s->x3, K2); + } + if (CRYPTO_KEYBYTES == 20) { + XOR(s->x1, KEYROT(K0, K1)); + XOR(s->x2, KEYROT(K1, K2)); + XOR(s->x3, KEYROT(K2, WORD_T(0))); + } + P12(s); + XOR(s->x3, K1); + XOR(s->x4, K2); + printstate("finalization", s); +} diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/ascon.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/ascon.h new file mode 100644 index 0000000..aa685d3 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/ascon.h @@ -0,0 +1,24 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#include "config.h" +#include "word.h" + +typedef struct { + word_t x0, x1, x2, x3, x4; +} state_t; + +#define ASCON_AD 0 +#define ASCON_ENC 1 +#define ASCON_DEC 2 + +void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, + uint8_t mode); + +void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen, + const uint8_t* ad, uint64_t adlen, const uint8_t* npub, + const uint8_t* k, uint8_t mode); + +#endif // ASCON_H_ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/config.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/config.h new file mode 100644 index 0000000..ec8bd6f --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/config.h @@ -0,0 +1,34 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* inline the Ascon mode */ +#ifndef ASCON_INLINE_MODE +#define ASCON_INLINE_MODE 1 +#endif + +/* inline the Ascon permutations */ +#ifndef ASCON_INLINE_PERM +#define ASCON_INLINE_PERM 0 +#endif + +/* single function for all permutations */ +#ifndef ASCON_SINGLE_PERM +#define ASCON_SINGLE_PERM 1 +#endif + +/* unroll the permutation loops */ +#ifndef ASCON_UNROLL_LOOPS +#define ASCON_UNROLL_LOOPS 1 +#endif + +/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ +#ifndef ASCON_DATA_ACCESS +#define ASCON_DATA_ACCESS 'H' +#endif + +/* make sure __forceinline is supported */ +#ifndef __forceinline +#define __forceinline inline __attribute__((always_inline)) +#endif + +#endif /* CONFIG_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/decrypt.c b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/decrypt.c index 0cde81e..605bd03 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/decrypt.c +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/decrypt.c @@ -1,29 +1,32 @@ -#include "core.h" +#include "api.h" +#include "ascon.h" +#include "loadstore.h" +#include "permutations.h" +#include "printstate.h" -int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen, - unsigned char* nsec, const unsigned char* c, - unsigned long long clen, const unsigned char* ad, - unsigned long long adlen, const unsigned char* npub, - const unsigned char* k) { +void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen, + const uint8_t* ad, uint64_t adlen, const uint8_t* npub, + const uint8_t* k, uint8_t mode); + +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { if (clen < CRYPTO_ABYTES) { *mlen = 0; return -1; } - - state s; + state_t s; (void)nsec; - - // set plaintext size + /* set plaintext size */ *mlen = clen - CRYPTO_ABYTES; - + /* ascon decryption */ ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DEC); - - // verify tag (should be constant time, check compiler output) - if (((s.x3 ^ U64BIG(*(u64*)(c + *mlen))) | - (s.x4 ^ U64BIG(*(u64*)(c + *mlen + 8)))) != 0) { + /* verify tag (should be constant time, check compiler output) */ + XOR(s.x3, LOAD64(c + *mlen)); + XOR(s.x4, LOAD64(c + *mlen + 8)); + if (NOTZERO(s.x3, s.x4)) { *mlen = 0; return -1; } - return 0; } diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/encrypt.c b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/encrypt.c index 5961c60..641b266 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/encrypt.c +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/encrypt.c @@ -1,21 +1,25 @@ -#include "core.h" +#include "api.h" +#include "ascon.h" +#include "loadstore.h" +#include "permutations.h" +#include "printstate.h" -int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen, - const unsigned char* m, unsigned long long mlen, - const unsigned char* ad, unsigned long long adlen, - const unsigned char* nsec, const unsigned char* npub, - const unsigned char* k) { - state s; - (void)nsec; +void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen, + const uint8_t* ad, uint64_t adlen, const uint8_t* npub, + const uint8_t* k, uint8_t mode); - // set ciphertext size +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + state_t s; + (void)nsec; + /* set ciphertext size */ *clen = mlen + CRYPTO_ABYTES; - + /* ascon encryption */ ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENC); - - // set tag - *(u64*)(c + mlen) = U64BIG(s.x3); - *(u64*)(c + mlen + 8) = U64BIG(s.x4); - + /* set tag */ + STORE64(c + mlen, s.x3); + STORE64(c + mlen + 8, s.x4); return 0; } diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/endian.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/endian.h index b4d18f5..3944360 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/endian.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/endian.h @@ -3,7 +3,10 @@ #if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -// macros for big endian machines +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif #define U64BIG(x) (x) #define U32BIG(x) (x) #define U16BIG(x) (x) @@ -11,19 +14,26 @@ #elif defined(_MSC_VER) || \ (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -// macros for little endian machines -#define U64BIG(x) \ - ((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) | \ - (((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) | \ - (((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) | \ - (((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56)) -#define U32BIG(x) \ - ((((x)&0x000000FF) << 24) | (((x)&0x0000FF00) << 8) | \ - (((x)&0x00FF0000) >> 8) | (((x)&0xFF000000) >> 24)) -#define U16BIG(x) ((((x)&0x00FF) << 8) | (((x)&0xFF00) >> 8)) +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) +#define U32BIG(x) \ + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) #else -#error "ascon byte order macros not defined in endian.h" +#error "Ascon byte order macros not defined in endian.h" #endif -#endif // ENDIAN_H_ +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/loadstore.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/loadstore.h new file mode 100644 index 0000000..6cb0cca --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/loadstore.h @@ -0,0 +1,118 @@ +#ifndef LOADSTORE_H_ +#define LOADSTORE_H_ + +#include + +#include "config.h" +#include "endian.h" +#include "word.h" + +/* 64-bit LSB mask (undefined for n == 0) */ +#define MASK(n) (~0ull >> (64 - (n))) + +/* get byte from Ascon 64-bit word */ +#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i)))) + +/* set byte in Ascon 64-bit word */ +#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i))) + +#if ASCON_DATA_ACCESS == 'W' + +#ifndef NDEBUG +#pragma message("Using wordwise data access") +#endif + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = U64BIG(*(uint64_t*)bytes); + return U64TOWORD(x); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = U64BIG(*(uint64_t*)bytes & MASK(8 * n)); + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(8 * n); + *(uint64_t*)bytes |= U64BIG(x); +} + +#elif ASCON_DATA_ACCESS == 'M' + +#ifndef NDEBUG +#pragma message("Using memcpy to access data") +#endif + +#include + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + memcpy((uint8_t*)&x, bytes, n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = U64BIG(WORDTOU64(w)); + memcpy(bytes, (uint8_t*)&x, n); +} + +#elif ASCON_DATA_ACCESS == 'B' + +#ifndef NDEBUG +#pragma message("Using bytewise data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#elif ASCON_DATA_ACCESS == 'H' + +#ifndef NDEBUG +#pragma message("Using hybrid data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + if (n == 8) + x = U64BIG(*(uint64_t*)bytes); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + if (n == 8) + *(uint64_t*)bytes = U64BIG(x); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#else +#error "Ascon data access macro not defined correctly" +#endif + +#endif /* LOADSTORE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/permutations.c b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/permutations.c index 9aaf9d1..3aecaa6 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/permutations.c +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/permutations.c @@ -1,8 +1,26 @@ #include "permutations.h" -void P(state *p, u8 rounds) { - state s = *p; - u8 i, start = START_CONSTANT(rounds); - for (i = start; i > 0x4a; i -= 0x0f) ROUND(i); - *p = s; +#include "round.h" + +#if ASCON_INLINE_PERM + +#elif ASCON_SINGLE_PERM + +void P(state_t* s, uint8_t rounds) { + printstate(" permutation input", s); + for (int i = START(rounds); i > 0x4a; i -= 0x0f) ROUND(s, i); } + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s) { P12ROUNDS(s); } + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16 +void P8(state_t* s) { P8ROUNDS(s); } +#endif + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 +void P6(state_t* s) { P6ROUNDS(s); } +#endif + +#endif diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/permutations.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/permutations.h index 7143e82..6172dd5 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/permutations.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/permutations.h @@ -1,66 +1,184 @@ #ifndef PERMUTATIONS_H_ #define PERMUTATIONS_H_ -typedef unsigned char u8; -typedef unsigned long long u64; - -typedef struct { - u64 x0, x1, x2, x3, x4; -} state; - -#define EXT_BYTE64(x, n) ((u8)((u64)(x) >> (8 * (7 - (n))))) -#define INS_BYTE64(x, n) ((u64)(x) << (8 * (7 - (n)))) -#define ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) -#define START_CONSTANT(x) (((0xf - (12 - (x))) << 4) | (12 - (x))) - -#define ROUND(C) \ - do { \ - state t; \ - s.x2 ^= C; \ - s.x0 ^= s.x4; \ - s.x4 ^= s.x3; \ - s.x2 ^= s.x1; \ - t.x0 = s.x0; \ - t.x4 = s.x4; \ - t.x3 = s.x3; \ - t.x1 = s.x1; \ - t.x2 = s.x2; \ - s.x0 = t.x0 ^ ((~t.x1) & t.x2); \ - s.x2 = t.x2 ^ ((~t.x3) & t.x4); \ - s.x4 = t.x4 ^ ((~t.x0) & t.x1); \ - s.x1 = t.x1 ^ ((~t.x2) & t.x3); \ - s.x3 = t.x3 ^ ((~t.x4) & t.x0); \ - s.x1 ^= s.x0; \ - t.x1 = s.x1; \ - s.x1 = ROTR64(s.x1, 39); \ - s.x3 ^= s.x2; \ - t.x2 = s.x2; \ - s.x2 = ROTR64(s.x2, 1); \ - t.x4 = s.x4; \ - t.x2 ^= s.x2; \ - s.x2 = ROTR64(s.x2, 6 - 1); \ - t.x3 = s.x3; \ - t.x1 ^= s.x1; \ - s.x3 = ROTR64(s.x3, 10); \ - s.x0 ^= s.x4; \ - s.x4 = ROTR64(s.x4, 7); \ - t.x3 ^= s.x3; \ - s.x2 ^= t.x2; \ - s.x1 = ROTR64(s.x1, 61 - 39); \ - t.x0 = s.x0; \ - s.x2 = ~s.x2; \ - s.x3 = ROTR64(s.x3, 17 - 10); \ - t.x4 ^= s.x4; \ - s.x4 = ROTR64(s.x4, 41 - 7); \ - s.x3 ^= t.x3; \ - s.x1 ^= t.x1; \ - s.x0 = ROTR64(s.x0, 19); \ - s.x4 ^= t.x4; \ - t.x0 ^= s.x0; \ - s.x0 = ROTR64(s.x0, 28 - 19); \ - s.x0 ^= t.x0; \ - } while (0) - -void P(state *p, u8 rounds); - -#endif // PERMUTATIONS_H_ +#include + +#include "api.h" +#include "ascon.h" +#include "printstate.h" +#include "round.h" + +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 + +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 + +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 + +#define ASCON_HASH_BYTES 32 + +#define ASCON_128_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_128A_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) + +#define ASCON_80PQ_IV \ + U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_HASH_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) + +#define ASCON_XOF_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) + +#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) +#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) +#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) +#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) +#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) + +#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) +#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) +#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) +#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) +#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 +#define IV ASCON_128_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#if ASCON_RATE == 16 +#define IV ASCON_128A_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define PB P8 +#endif + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 +#define IV ASCON_80PQ_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#define START(n) ((3 + (n)) << 4 | (12 - (n))) + +#if ASCON_UNROLL_LOOPS + +__forceinline void P12ROUNDS(state_t* s) { + ROUND(s, 0xf0); + ROUND(s, 0xe1); + ROUND(s, 0xd2); + ROUND(s, 0xc3); + ROUND(s, 0xb4); + ROUND(s, 0xa5); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +__forceinline void P8ROUNDS(state_t* s) { + ROUND(s, 0xb4); + ROUND(s, 0xa5); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +__forceinline void P6ROUNDS(state_t* s) { + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +#else /* !ASCON_UNROLL_LOOPS */ + +__forceinline void P12ROUNDS(state_t* s) { + for (int i = START(12); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +__forceinline void P8ROUNDS(state_t* s) { + for (int i = START(8); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +__forceinline void P6ROUNDS(state_t* s) { + for (int i = START(6); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +#endif + +#if ASCON_INLINE_PERM + +__forceinline void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +__forceinline void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} + +__forceinline void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#elif ASCON_SINGLE_PERM + +#define P12(s) P(s, 12) +#define P8(s) P(s, 8) +#define P6(s) P(s, 6) + +void P(state_t* s, uint8_t rounds); + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s); +void P8(state_t* s); +void P6(state_t* s); + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#endif + +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/printstate.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/printstate.h new file mode 100644 index 0000000..34bd476 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/printstate.h @@ -0,0 +1,31 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/process.c b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/process.c new file mode 100644 index 0000000..2ec9d1f --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/process.c @@ -0,0 +1,63 @@ +#include "api.h" +#include "ascon.h" +#include "loadstore.h" +#include "permutations.h" +#include "printstate.h" + +void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, + uint8_t mode) { + word_t tmp0, tmp1; + uint64_t t0, t1; + uint64_t i; + + while (len >= ASCON_RATE) { + tmp0 = LOAD64(in); + tmp1 = LOAD64(in + 8); + XOR(s->x0, tmp0); + XOR(s->x1, tmp1); + if (mode != ASCON_AD) { + STORE64(out, s->x0); + STORE64(out + 8, s->x1); + } + if (mode == ASCON_DEC) { + s->x0 = tmp0; + s->x1 = tmp1; + } + PB(s); + in += ASCON_RATE; + out += ASCON_RATE; + len -= ASCON_RATE; + } + + if (len) { + tmp1 = WORD_T(0); + if (len >= 8) tmp0 = LOAD64(in); + if (len > 8) + tmp1 = LOAD(in + 8, len - 8); + else + tmp0 = LOAD(in, len); + XOR(s->x0, tmp0); + XOR(s->x1, tmp1); + if (mode != ASCON_AD) { + if (len >= 8) STORE64(out, s->x0); + if (len > 8) + STORE(out + 8, s->x1, len - 8); + else + STORE(out, s->x0, len); + } + if (mode == ASCON_DEC) { + if (len >= 8) s->x0 = tmp0; + if (len > 8) { + AND(s->x1, XMASK(len - 8)); + XOR(s->x1, tmp1); + } else { + AND(s->x0, XMASK(len)); + XOR(s->x0, tmp0); + } + } + } + if (len < 8) + XOR(s->x0, PAD(len % 8)); + else + XOR(s->x1, PAD(len % 8)); +} diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/round.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/round.h new file mode 100644 index 0000000..077cbfd --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/round.h @@ -0,0 +1,69 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" + +__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { + *K0 = WORD_T(0); + *K1 = WORD_T(0); + *K2 = WORD_T(0); +} + +__forceinline void PINIT(state_t* s) { + s->x0 = WORD_T(0); + s->x1 = WORD_T(0); + s->x2 = WORD_T(0); + s->x3 = WORD_T(0); + s->x4 = WORD_T(0); +} + +__forceinline void ROUND(state_t* s, uint64_t C) { + state_t t; + s->x2 ^= C; + s->x0 ^= s->x4; + s->x4 ^= s->x3; + s->x2 ^= s->x1; + t.x0 = s->x0; + t.x4 = s->x4; + t.x3 = s->x3; + t.x1 = s->x1; + t.x2 = s->x2; + s->x0 = t.x0 ^ (~t.x1 & t.x2); + s->x2 = t.x2 ^ (~t.x3 & t.x4); + s->x4 = t.x4 ^ (~t.x0 & t.x1); + s->x1 = t.x1 ^ (~t.x2 & t.x3); + s->x3 = t.x3 ^ (~t.x4 & t.x0); + s->x1 ^= s->x0; + t.x1 = s->x1; + s->x1 = ROR64(s->x1, 39); + s->x3 ^= s->x2; + t.x2 = s->x2; + s->x2 = ROR64(s->x2, 1); + t.x4 = s->x4; + t.x2 ^= s->x2; + s->x2 = ROR64(s->x2, 6 - 1); + t.x3 = s->x3; + t.x1 ^= s->x1; + s->x3 = ROR64(s->x3, 10); + s->x0 ^= s->x4; + s->x4 = ROR64(s->x4, 7); + t.x3 ^= s->x3; + s->x2 ^= t.x2; + s->x1 = ROR64(s->x1, 61 - 39); + t.x0 = s->x0; + s->x2 = ~s->x2; + s->x3 = ROR64(s->x3, 17 - 10); + t.x4 ^= s->x4; + s->x4 = ROR64(s->x4, 41 - 7); + s->x3 ^= t.x3; + s->x1 ^= t.x1; + s->x0 = ROR64(s->x0, 19); + s->x4 ^= t.x4; + t.x0 ^= s->x0; + s->x0 = ROR64(s->x0, 28 - 19); + s->x0 ^= t.x0; + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/word.h b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/word.h new file mode 100644 index 0000000..5d601bb --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/opt64_lowsize/word.h @@ -0,0 +1,47 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#include "config.h" + +typedef uint64_t word_t; + +#define WORD_T +#define UINT64_T + +#define U64TOWORD +#define WORDTOU64 + +#define XOR(a, b) \ + do { \ + (a) ^= (b); \ + } while (0) + +#define AND(a, b) \ + do { \ + (a) &= (b); \ + } while (0) + +__forceinline word_t ROR64(word_t x, int n) { return x >> n | x << (64 - n); } + +__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { + return lo2hi << 32 | hi2lo >> 32; +} + +__forceinline int NOTZERO(word_t a, word_t b) { + int result = 0; + for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i]; + for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i]; + return result; +} + +/* set padding byte in 64-bit Ascon word */ +__forceinline word_t PAD(int i) { return WORD_T(0x80ull << (56 - 8 * i)); } + +/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ +__forceinline word_t XMASK(int n) { + return WORD_T(0x00ffffffffffffffull >> (n * 8 - 8)); +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/ref/ascon.h b/ascon/Implementations/crypto_aead/ascon128av12/ref/ascon.h new file mode 100644 index 0000000..8ab0502 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/ref/ascon.h @@ -0,0 +1,14 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#define WORDTOU64 + +typedef uint64_t word_t; + +typedef struct { + word_t x0, x1, x2, x3, x4; +} state_t; + +#endif // ASCON_H_ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/ref/decrypt.c b/ascon/Implementations/crypto_aead/ascon128av12/ref/decrypt.c index ee7a06f..daafb5e 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/ref/decrypt.c +++ b/ascon/Implementations/crypto_aead/ascon128av12/ref/decrypt.c @@ -1,115 +1,111 @@ #include "api.h" +#include "ascon.h" +#include "loadstore.h" #include "permutations.h" +#include "printstate.h" -#define RATE (128 / 8) -#define PA_ROUNDS 12 -#define PB_ROUNDS 8 -#define IV \ - ((u64)(8 * (CRYPTO_KEYBYTES)) << 56 | (u64)(8 * (RATE)) << 48 | \ - (u64)(PA_ROUNDS) << 40 | (u64)(PB_ROUNDS) << 32) +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { + uint64_t K0, K1, N0, N1; + state_t s; + (void)nsec; -int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen, - unsigned char* nsec, const unsigned char* c, - unsigned long long clen, const unsigned char* ad, - unsigned long long adlen, const unsigned char* npub, - const unsigned char* k) { if (clen < CRYPTO_ABYTES) { *mlen = 0; return -1; } - const u64 K0 = BYTES_TO_U64(k, 8); - const u64 K1 = BYTES_TO_U64(k + 8, 8); - const u64 N0 = BYTES_TO_U64(npub, 8); - const u64 N1 = BYTES_TO_U64(npub + 8, 8); - state s; - u64 c0, c1; - (void)nsec; - - // set plaintext size + /* set plaintext size */ *mlen = clen - CRYPTO_ABYTES; - // initialization - s.x0 = IV; + /* load key and nonce */ + K0 = LOAD(k, 8); + K1 = LOAD(k + 8, 8); + N0 = LOAD(npub, 8); + N1 = LOAD(npub + 8, 8); + + /* initialization */ + s.x0 = ASCON_128A_IV; s.x1 = K0; s.x2 = K1; s.x3 = N0; s.x4 = N1; - printstate("initial value:", s); P12(&s); s.x3 ^= K0; s.x4 ^= K1; - printstate("initialization:", s); + printstate("initialization", &s); - // process associated data + /* process associated data */ if (adlen) { - while (adlen >= RATE) { - s.x0 ^= BYTES_TO_U64(ad, 8); - s.x1 ^= BYTES_TO_U64(ad + 8, 8); + while (adlen >= ASCON_128A_RATE) { + s.x0 ^= LOAD(ad, 8); + s.x1 ^= LOAD(ad + 8, 8); P8(&s); - adlen -= RATE; - ad += RATE; + ad += ASCON_128A_RATE; + adlen -= ASCON_128A_RATE; } + /* final associated data block */ if (adlen >= 8) { - s.x0 ^= BYTES_TO_U64(ad, 8); - s.x1 ^= BYTES_TO_U64(ad + 8, adlen - 8); - s.x1 ^= 0x80ull << (56 - 8 * (adlen - 8)); + s.x0 ^= LOAD(ad, 8); + s.x1 ^= LOAD(ad + 8, adlen - 8); + s.x1 ^= PAD(adlen - 8); } else { - s.x0 ^= BYTES_TO_U64(ad, adlen); - s.x0 ^= 0x80ull << (56 - 8 * adlen); + s.x0 ^= LOAD(ad, adlen); + s.x0 ^= PAD(adlen); } P8(&s); } s.x4 ^= 1; - printstate("process associated data:", s); + printstate("process associated data", &s); - // process plaintext + /* process ciphertext */ clen -= CRYPTO_ABYTES; - while (clen >= RATE) { - c0 = BYTES_TO_U64(c, 8); - c1 = BYTES_TO_U64(c + 8, 8); - U64_TO_BYTES(m, s.x0 ^ c0, 8); - U64_TO_BYTES(m + 8, s.x1 ^ c1, 8); + while (clen >= ASCON_128A_RATE) { + uint64_t c0 = LOAD(c, 8); + uint64_t c1 = LOAD(c + 8, 8); + STORE(m, s.x0 ^ c0, 8); + STORE(m + 8, s.x1 ^ c1, 8); s.x0 = c0; s.x1 = c1; P8(&s); - clen -= RATE; - m += RATE; - c += RATE; + m += ASCON_128A_RATE; + c += ASCON_128A_RATE; + clen -= ASCON_128A_RATE; } + /* final ciphertext block */ if (clen >= 8) { - c0 = BYTES_TO_U64(c, 8); - c1 = BYTES_TO_U64(c + 8, clen - 8); - U64_TO_BYTES(m, s.x0 ^ c0, 8); - U64_TO_BYTES(m + 8, s.x1 ^ c1, clen - 8); + uint64_t c0 = LOAD(c, 8); + uint64_t c1 = LOAD(c + 8, clen - 8); + STORE(m, s.x0 ^ c0, 8); + STORE(m + 8, s.x1 ^ c1, clen - 8); s.x0 = c0; - s.x1 &= ~BYTE_MASK(clen - 8); + s.x1 &= ~MASK(clen - 8); s.x1 |= c1; - s.x1 ^= 0x80ull << (56 - 8 * (clen - 8)); + s.x1 ^= PAD(clen - 8); } else { - c0 = BYTES_TO_U64(c, clen); - U64_TO_BYTES(m, s.x0 ^ c0, clen); - s.x0 &= ~BYTE_MASK(clen); + uint64_t c0 = LOAD(c, clen); + STORE(m, s.x0 ^ c0, clen); + s.x0 &= ~MASK(clen); s.x0 |= c0; - s.x0 ^= 0x80ull << (56 - 8 * clen); + s.x0 ^= PAD(clen); } c += clen; - printstate("process plaintext:", s); + printstate("process ciphertext", &s); - // finalization + /* finalization */ s.x2 ^= K0; s.x3 ^= K1; P12(&s); s.x3 ^= K0; s.x4 ^= K1; - printstate("finalization:", s); + printstate("finalization", &s); - // verify tag (should be constant time, check compiler output) - if (((s.x3 ^ BYTES_TO_U64(c, 8)) | (s.x4 ^ BYTES_TO_U64(c + 8, 8))) != 0) { + /* verify tag (should be constant time, check compiler output) */ + if ((s.x3 ^ LOAD(c, 8)) | (s.x4 ^ LOAD(c + 8, 8))) { *mlen = 0; return -1; } return 0; } - diff --git a/ascon/Implementations/crypto_aead/ascon128av12/ref/encrypt.c b/ascon/Implementations/crypto_aead/ascon128av12/ref/encrypt.c index 21029b3..63abd34 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/ref/encrypt.c +++ b/ascon/Implementations/crypto_aead/ascon128av12/ref/encrypt.c @@ -1,99 +1,97 @@ #include "api.h" +#include "ascon.h" +#include "loadstore.h" #include "permutations.h" +#include "printstate.h" -#define RATE (128 / 8) -#define PA_ROUNDS 12 -#define PB_ROUNDS 8 -#define IV \ - ((u64)(8 * (CRYPTO_KEYBYTES)) << 56 | (u64)(8 * (RATE)) << 48 | \ - (u64)(PA_ROUNDS) << 40 | (u64)(PB_ROUNDS) << 32) - -int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen, - const unsigned char* m, unsigned long long mlen, - const unsigned char* ad, unsigned long long adlen, - const unsigned char* nsec, const unsigned char* npub, - const unsigned char* k) { - const u64 K0 = BYTES_TO_U64(k, 8); - const u64 K1 = BYTES_TO_U64(k + 8, 8); - const u64 N0 = BYTES_TO_U64(npub, 8); - const u64 N1 = BYTES_TO_U64(npub + 8, 8); - state s; +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + uint64_t K0, K1, N0, N1; + state_t s; (void)nsec; - // set ciphertext size + /* set ciphertext size */ *clen = mlen + CRYPTO_ABYTES; - // initialization - s.x0 = IV; + /* load key and nonce */ + K0 = LOAD(k, 8); + K1 = LOAD(k + 8, 8); + N0 = LOAD(npub, 8); + N1 = LOAD(npub + 8, 8); + + /* initialization */ + s.x0 = ASCON_128A_IV; s.x1 = K0; s.x2 = K1; s.x3 = N0; s.x4 = N1; - printstate("initial value:", s); P12(&s); s.x3 ^= K0; s.x4 ^= K1; - printstate("initialization:", s); + printstate("initialization", &s); - // process associated data + /* process associated data */ if (adlen) { - while (adlen >= RATE) { - s.x0 ^= BYTES_TO_U64(ad, 8); - s.x1 ^= BYTES_TO_U64(ad + 8, 8); + while (adlen >= ASCON_128A_RATE) { + s.x0 ^= LOAD(ad, 8); + s.x1 ^= LOAD(ad + 8, 8); P8(&s); - adlen -= RATE; - ad += RATE; + ad += ASCON_128A_RATE; + adlen -= ASCON_128A_RATE; } + /* final associated data block */ if (adlen >= 8) { - s.x0 ^= BYTES_TO_U64(ad, 8); - s.x1 ^= BYTES_TO_U64(ad + 8, adlen - 8); - s.x1 ^= 0x80ull << (56 - 8 * (adlen - 8)); + s.x0 ^= LOAD(ad, 8); + s.x1 ^= LOAD(ad + 8, adlen - 8); + s.x1 ^= PAD(adlen - 8); } else { - s.x0 ^= BYTES_TO_U64(ad, adlen); - s.x0 ^= 0x80ull << (56 - 8 * adlen); + s.x0 ^= LOAD(ad, adlen); + s.x0 ^= PAD(adlen); } P8(&s); } s.x4 ^= 1; - printstate("process associated data:", s); + printstate("process associated data", &s); - // process plaintext - while (mlen >= RATE) { - s.x0 ^= BYTES_TO_U64(m, 8); - s.x1 ^= BYTES_TO_U64(m + 8, 8); - U64_TO_BYTES(c, s.x0, 8); - U64_TO_BYTES(c + 8, s.x1, 8); + /* process plaintext */ + while (mlen >= ASCON_128A_RATE) { + s.x0 ^= LOAD(m, 8); + s.x1 ^= LOAD(m + 8, 8); + STORE(c, s.x0, 8); + STORE(c + 8, s.x1, 8); P8(&s); - mlen -= RATE; - m += RATE; - c += RATE; + m += ASCON_128A_RATE; + c += ASCON_128A_RATE; + mlen -= ASCON_128A_RATE; } + /* final plaintext block */ if (mlen >= 8) { - s.x0 ^= BYTES_TO_U64(m, 8); - s.x1 ^= BYTES_TO_U64(m + 8, mlen - 8); - s.x1 ^= 0x80ull << (56 - 8 * (mlen - 8)); - U64_TO_BYTES(c, s.x0, 8); - U64_TO_BYTES(c + 8, s.x1, mlen - 8); + s.x0 ^= LOAD(m, 8); + s.x1 ^= LOAD(m + 8, mlen - 8); + STORE(c, s.x0, 8); + STORE(c + 8, s.x1, mlen - 8); + s.x1 ^= PAD(mlen - 8); } else { - s.x0 ^= BYTES_TO_U64(m, mlen); - s.x0 ^= 0x80ull << (56 - 8 * mlen); - U64_TO_BYTES(c, s.x0, mlen); + s.x0 ^= LOAD(m, mlen); + STORE(c, s.x0, mlen); + s.x0 ^= PAD(mlen); } c += mlen; - printstate("process plaintext:", s); + printstate("process plaintext", &s); - // finalization + /* finalization */ s.x2 ^= K0; s.x3 ^= K1; P12(&s); s.x3 ^= K0; s.x4 ^= K1; - printstate("finalization:", s); + printstate("finalization", &s); - // set tag - U64_TO_BYTES(c, s.x3, 8); - U64_TO_BYTES(c + 8, s.x4, 8); + /* set tag */ + STORE(c, s.x3, 8); + STORE(c + 8, s.x4, 8); return 0; } - diff --git a/ascon/Implementations/crypto_aead/ascon128av12/ref/loadstore.h b/ascon/Implementations/crypto_aead/ascon128av12/ref/loadstore.h new file mode 100644 index 0000000..eccf0e3 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/ref/loadstore.h @@ -0,0 +1,31 @@ +#ifndef LOADSTORE_H_ +#define LOADSTORE_H_ + +#include + +/* get byte from Ascon 64-bit word */ +#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i)))) + +/* set byte in Ascon 64-bit word */ +#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i))) + +/* set padding byte in Ascon 64-bit word */ +#define PAD(i) SETBYTE(0x80, i) + +static inline uint64_t MASK(int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) x |= SETBYTE(0xff, i); + return x; +} + +static inline uint64_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) x |= SETBYTE(bytes[i], i); + return x; +} + +static inline void STORE(uint8_t* bytes, uint64_t x, int n) { + for (int i = 0; i < n; ++i) bytes[i] = GETBYTE(x, i); +} + +#endif /* LOADSTORE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/ref/permutations.h b/ascon/Implementations/crypto_aead/ascon128av12/ref/permutations.h index 3317a9f..ef6dd68 100644 --- a/ascon/Implementations/crypto_aead/ascon128av12/ref/permutations.h +++ b/ascon/Implementations/crypto_aead/ascon128av12/ref/permutations.h @@ -1,132 +1,88 @@ #ifndef PERMUTATIONS_H_ #define PERMUTATIONS_H_ -#ifdef DEBUG -#include -#endif +#include -typedef unsigned char u8; -typedef unsigned long long u64; +#include "ascon.h" +#include "printstate.h" +#include "round.h" -typedef struct { - u64 x0, x1, x2, x3, x4; -} state; +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 -static inline void printstate(const char* text, const state s) { -#ifdef DEBUG - printf("%s\n", text); - printf(" x0=%016llx\n", s.x0); - printf(" x1=%016llx\n", s.x1); - printf(" x2=%016llx\n", s.x2); - printf(" x3=%016llx\n", s.x3); - printf(" x4=%016llx\n", s.x4); -#else - // disable warning about unused parameters - (void)text; - (void)s; -#endif -} +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 -static inline u64 BYTES_TO_U64(const u8* bytes, int n) { - int i; - u64 x = 0; - for (i = 0; i < n; i++) x |= ((u64)bytes[i]) << (56 - 8 * i); - return x; -} +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 -static inline void U64_TO_BYTES(u8* bytes, const u64 x, int n) { - int i; - for (i = 0; i < n; i++) bytes[i] = (u8)(x >> (56 - 8 * i)); -} +#define ASCON_HASH_BYTES 32 -static inline u64 BYTE_MASK(int n) { - int i; - u64 x = 0; - for (i = 0; i < n; i++) x |= 0xffull << (56 - 8 * i); - return x; -} +#define ASCON_128_IV \ + (((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) -static inline u64 ROTR64(u64 x, int n) { return (x << (64 - n)) | (x >> n); } +#define ASCON_128A_IV \ + (((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) -static inline void ROUND(u8 C, state* p) { - state s = *p; - state t; - // addition of round constant - s.x2 ^= C; - printstate(" addition of round constant:", s); - // substitution layer - s.x0 ^= s.x4; - s.x4 ^= s.x3; - s.x2 ^= s.x1; - // start of keccak s-box - t.x0 = ~s.x0; - t.x1 = ~s.x1; - t.x2 = ~s.x2; - t.x3 = ~s.x3; - t.x4 = ~s.x4; - t.x0 &= s.x1; - t.x1 &= s.x2; - t.x2 &= s.x3; - t.x3 &= s.x4; - t.x4 &= s.x0; - s.x0 ^= t.x1; - s.x1 ^= t.x2; - s.x2 ^= t.x3; - s.x3 ^= t.x4; - s.x4 ^= t.x0; - // end of keccak s-box - s.x1 ^= s.x0; - s.x0 ^= s.x4; - s.x3 ^= s.x2; - s.x2 = ~s.x2; - printstate(" substitution layer:", s); - // linear diffusion layer - s.x0 ^= ROTR64(s.x0, 19) ^ ROTR64(s.x0, 28); - s.x1 ^= ROTR64(s.x1, 61) ^ ROTR64(s.x1, 39); - s.x2 ^= ROTR64(s.x2, 1) ^ ROTR64(s.x2, 6); - s.x3 ^= ROTR64(s.x3, 10) ^ ROTR64(s.x3, 17); - s.x4 ^= ROTR64(s.x4, 7) ^ ROTR64(s.x4, 41); - printstate(" linear diffusion layer:", s); - *p = s; -} +#define ASCON_80PQ_IV \ + (((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) -static inline void P12(state* s) { - printstate(" permutation input:", *s); - ROUND(0xf0, s); - ROUND(0xe1, s); - ROUND(0xd2, s); - ROUND(0xc3, s); - ROUND(0xb4, s); - ROUND(0xa5, s); - ROUND(0x96, s); - ROUND(0x87, s); - ROUND(0x78, s); - ROUND(0x69, s); - ROUND(0x5a, s); - ROUND(0x4b, s); -} +#define ASCON_HASH_IV \ + (((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) -static inline void P8(state* s) { - printstate(" permutation input:", *s); - ROUND(0xb4, s); - ROUND(0xa5, s); - ROUND(0x96, s); - ROUND(0x87, s); - ROUND(0x78, s); - ROUND(0x69, s); - ROUND(0x5a, s); - ROUND(0x4b, s); +#define ASCON_XOF_IV \ + (((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) + +static inline void P12(state_t* s) { + printstate(" permutation input", s); + ROUND(s, 0xf0); + ROUND(s, 0xe1); + ROUND(s, 0xd2); + ROUND(s, 0xc3); + ROUND(s, 0xb4); + ROUND(s, 0xa5); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); } -static inline void P6(state* s) { - printstate(" permutation input:", *s); - ROUND(0x96, s); - ROUND(0x87, s); - ROUND(0x78, s); - ROUND(0x69, s); - ROUND(0x5a, s); - ROUND(0x4b, s); +static inline void P8(state_t* s) { + printstate(" permutation input", s); + ROUND(s, 0xb4); + ROUND(s, 0xa5); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); } -#endif // PERMUTATIONS_H_ +static inline void P6(state_t* s) { + printstate(" permutation input", s); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/ref/printstate.h b/ascon/Implementations/crypto_aead/ascon128av12/ref/printstate.h new file mode 100644 index 0000000..34bd476 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/ref/printstate.h @@ -0,0 +1,31 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128av12/ref/round.h b/ascon/Implementations/crypto_aead/ascon128av12/ref/round.h new file mode 100644 index 0000000..dcb6c81 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128av12/ref/round.h @@ -0,0 +1,40 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" + +static inline uint64_t ROR64(uint64_t x, int n) { + return (x << (64 - n)) | (x >> n); +} + +static inline void ROUND(state_t* s, uint8_t C) { + state_t t; + /* addition of round constant */ + s->x2 ^= C; + /* substitution layer */ + s->x0 ^= s->x4; + s->x4 ^= s->x3; + s->x2 ^= s->x1; + /* start of keccak s-box */ + t.x0 = s->x0 ^ (~s->x1 & s->x2); + t.x1 = s->x1 ^ (~s->x2 & s->x3); + t.x2 = s->x2 ^ (~s->x3 & s->x4); + t.x3 = s->x3 ^ (~s->x4 & s->x0); + t.x4 = s->x4 ^ (~s->x0 & s->x1); + /* end of keccak s-box */ + t.x1 ^= t.x0; + t.x0 ^= t.x4; + t.x3 ^= t.x2; + t.x2 = ~t.x2; + /* printstate(" substitution layer", &t); */ + /* linear diffusion layer */ + s->x0 = t.x0 ^ ROR64(t.x0, 19) ^ ROR64(t.x0, 28); + s->x1 = t.x1 ^ ROR64(t.x1, 61) ^ ROR64(t.x1, 39); + s->x2 = t.x2 ^ ROR64(t.x2, 1) ^ ROR64(t.x2, 6); + s->x3 = t.x3 ^ ROR64(t.x3, 10) ^ ROR64(t.x3, 17); + s->x4 = t.x4 ^ ROR64(t.x4, 7) ^ ROR64(t.x4, 41); + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi16/api.h b/ascon/Implementations/crypto_aead/ascon128v12/bi16/api.h index a4aa567..96a7c47 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/bi16/api.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi16/api.h @@ -3,3 +3,4 @@ #define CRYPTO_NPUBBYTES 16 #define CRYPTO_ABYTES 16 #define CRYPTO_NOOVERLAP 1 +#define ASCON_RATE 8 diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi16/ascon.c b/ascon/Implementations/crypto_aead/ascon128v12/bi16/ascon.c index 296a10d..983aa7a 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/bi16/ascon.c +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi16/ascon.c @@ -1,19 +1,13 @@ +#include + #include "api.h" #include "endian.h" -typedef unsigned char u8; -typedef unsigned short u16; -typedef unsigned int u32; -typedef unsigned long long u64; - -#define RATE (64 / 8) #define PA_ROUNDS 12 #define PB_ROUNDS 6 -#define EXT_BYTE16(x, n) ((u8)((u16)(x) >> (8 * (1 - (n))))) -#define INS_BYTE16(x, n) ((u16)(x) << (8 * (1 - (n)))) -#define ROTR16(x, n) (((x) >> (n)) | ((x) << (16 - (n)))) -#define ROTL16(x, n) (((x) << (n)) | ((x) >> (16 - (n)))) +#define ROR16(x, n) (((x) >> (n)) | ((x) << (16 - (n)))) +#define ROL16(x, n) (((x) << (n)) | ((x) >> (16 - (n)))) #define COMPRESS_LONG_16(x) \ do { \ @@ -35,24 +29,24 @@ typedef unsigned long long u64; COMPRESS_LONG_16(var_3); \ } while (0) -#define COMPRESS_BYTE_ARRAY_16(a, var_3, var_2, var_1, var_0) \ - do { \ - COMPRESS_U16(U16BIG(((u16 *)(a))[3]), var_3, var_2, var_1, var_0); \ - COMPRESS_U16(U16BIG(((u16 *)(a))[2]), t1_3, t1_2, t1_1, t1_0); \ - var_0 |= t1_0 << 4; \ - var_1 |= t1_1 << 4; \ - var_2 |= t1_2 << 4; \ - var_3 |= t1_3 << 4; \ - COMPRESS_U16(U16BIG(((u16 *)(a))[1]), t1_3, t1_2, t1_1, t1_0); \ - var_0 |= t1_0 << 8; \ - var_1 |= t1_1 << 8; \ - var_2 |= t1_2 << 8; \ - var_3 |= t1_3 << 8; \ - COMPRESS_U16(U16BIG(((u16 *)(a))[0]), t1_3, t1_2, t1_1, t1_0); \ - var_0 |= t1_0 << 12; \ - var_1 |= t1_1 << 12; \ - var_2 |= t1_2 << 12; \ - var_3 |= t1_3 << 12; \ +#define COMPRESS_BYTE_ARRAY_16(a, var_3, var_2, var_1, var_0) \ + do { \ + COMPRESS_U16(U16BIG(((uint16_t*)(a))[3]), var_3, var_2, var_1, var_0); \ + COMPRESS_U16(U16BIG(((uint16_t*)(a))[2]), t1_3, t1_2, t1_1, t1_0); \ + var_0 |= t1_0 << 4; \ + var_1 |= t1_1 << 4; \ + var_2 |= t1_2 << 4; \ + var_3 |= t1_3 << 4; \ + COMPRESS_U16(U16BIG(((uint16_t*)(a))[1]), t1_3, t1_2, t1_1, t1_0); \ + var_0 |= t1_0 << 8; \ + var_1 |= t1_1 << 8; \ + var_2 |= t1_2 << 8; \ + var_3 |= t1_3 << 8; \ + COMPRESS_U16(U16BIG(((uint16_t*)(a))[0]), t1_3, t1_2, t1_1, t1_0); \ + var_0 |= t1_0 << 12; \ + var_1 |= t1_1 << 12; \ + var_2 |= t1_2 << 12; \ + var_3 |= t1_3 << 12; \ } while (0) #define EXPAND_SHORT_16(x) \ @@ -76,28 +70,28 @@ typedef unsigned long long u64; *var = U16BIG(t0_0 | (t0_1 << 1) | (t0_2 << 2) | (t0_3 << 3)); \ } while (0) -#define EXPAND_BYTE_ARRAY_16(a, var_3, var_2, var_1, var_0) \ - do { \ - EXPAND_U16((((u16 *)(a)) + 3), var_3, var_2, var_1, var_0); \ - t1_3 = var_3 >> 4; \ - t1_2 = var_2 >> 4; \ - t1_1 = var_1 >> 4; \ - t1_0 = var_0 >> 4; \ - EXPAND_U16((((u16 *)(a)) + 2), t1_3, t1_2, t1_1, t1_0); \ - t1_3 >>= 4; \ - t1_2 >>= 4; \ - t1_1 >>= 4; \ - t1_0 >>= 4; \ - EXPAND_U16((((u16 *)(a)) + 1), t1_3, t1_2, t1_1, t1_0); \ - t1_3 >>= 4; \ - t1_2 >>= 4; \ - t1_1 >>= 4; \ - t1_0 >>= 4; \ - EXPAND_U16((((u16 *)(a)) + 0), t1_3, t1_2, t1_1, t1_0); \ +#define EXPAND_BYTE_ARRAY_16(a, var_3, var_2, var_1, var_0) \ + do { \ + EXPAND_U16((((uint16_t*)(a)) + 3), var_3, var_2, var_1, var_0); \ + t1_3 = var_3 >> 4; \ + t1_2 = var_2 >> 4; \ + t1_1 = var_1 >> 4; \ + t1_0 = var_0 >> 4; \ + EXPAND_U16((((uint16_t*)(a)) + 2), t1_3, t1_2, t1_1, t1_0); \ + t1_3 >>= 4; \ + t1_2 >>= 4; \ + t1_1 >>= 4; \ + t1_0 >>= 4; \ + EXPAND_U16((((uint16_t*)(a)) + 1), t1_3, t1_2, t1_1, t1_0); \ + t1_3 >>= 4; \ + t1_2 >>= 4; \ + t1_1 >>= 4; \ + t1_0 >>= 4; \ + EXPAND_U16((((uint16_t*)(a)) + 0), t1_3, t1_2, t1_1, t1_0); \ } while (0) -// This way of implementing Ascon's S-box was inpired by personal communication -// with Joan Daemen about implementing the 3-bit chi layer. +/* This way of implementing Ascon's S-box was inpired by personal communication + with Joan Daemen about implementing the 3-bit chi layer. */ #define ROUND_16(C_3, C_2, C_1, C_0) \ do { \ /* round constant */ \ @@ -179,62 +173,62 @@ typedef unsigned long long u64; t0_1 = x0_1; \ t0_2 = x0_2; \ t0_3 = x0_3; \ - x0_1 ^= ROTR16(t0_0, 5); \ - x0_2 ^= ROTR16(t0_1, 5); \ - x0_3 ^= ROTR16(t0_2, 5); \ - x0_0 ^= ROTR16(t0_3, 4); \ - x0_0 ^= ROTR16(t0_0, 7); \ - x0_1 ^= ROTR16(t0_1, 7); \ - x0_2 ^= ROTR16(t0_2, 7); \ - x0_3 ^= ROTR16(t0_3, 7); \ + x0_1 ^= ROR16(t0_0, 5); \ + x0_2 ^= ROR16(t0_1, 5); \ + x0_3 ^= ROR16(t0_2, 5); \ + x0_0 ^= ROR16(t0_3, 4); \ + x0_0 ^= ROR16(t0_0, 7); \ + x0_1 ^= ROR16(t0_1, 7); \ + x0_2 ^= ROR16(t0_2, 7); \ + x0_3 ^= ROR16(t0_3, 7); \ t0_0 = x1_0; \ t0_1 = x1_1; \ t0_2 = x1_2; \ t0_3 = x1_3; \ x1_3 ^= t0_0; \ - x1_0 ^= ROTL16(t0_1, 1); \ - x1_1 ^= ROTL16(t0_2, 1); \ - x1_2 ^= ROTL16(t0_3, 1); \ - x1_1 ^= ROTL16(t0_0, 6); \ - x1_2 ^= ROTL16(t0_1, 6); \ - x1_3 ^= ROTL16(t0_2, 6); \ - x1_0 ^= ROTL16(t0_3, 7); \ + x1_0 ^= ROL16(t0_1, 1); \ + x1_1 ^= ROL16(t0_2, 1); \ + x1_2 ^= ROL16(t0_3, 1); \ + x1_1 ^= ROL16(t0_0, 6); \ + x1_2 ^= ROL16(t0_1, 6); \ + x1_3 ^= ROL16(t0_2, 6); \ + x1_0 ^= ROL16(t0_3, 7); \ t0_0 = x2_0; \ t0_1 = x2_1; \ t0_2 = x2_2; \ t0_3 = x2_3; \ - x2_3 ^= ROTR16(t0_0, 1); \ + x2_3 ^= ROR16(t0_0, 1); \ x2_0 ^= t0_1; \ x2_1 ^= t0_2; \ x2_2 ^= t0_3; \ - x2_2 ^= ROTR16(t0_0, 2); \ - x2_3 ^= ROTR16(t0_1, 2); \ - x2_0 ^= ROTR16(t0_2, 1); \ - x2_1 ^= ROTR16(t0_3, 1); \ + x2_2 ^= ROR16(t0_0, 2); \ + x2_3 ^= ROR16(t0_1, 2); \ + x2_0 ^= ROR16(t0_2, 1); \ + x2_1 ^= ROR16(t0_3, 1); \ t0_0 = x3_0; \ t0_1 = x3_1; \ t0_2 = x3_2; \ t0_3 = x3_3; \ - x3_2 ^= ROTR16(t0_0, 3); \ - x3_3 ^= ROTR16(t0_1, 3); \ - x3_0 ^= ROTR16(t0_2, 2); \ - x3_1 ^= ROTR16(t0_3, 2); \ - x3_3 ^= ROTR16(t0_0, 5); \ - x3_0 ^= ROTR16(t0_1, 4); \ - x3_1 ^= ROTR16(t0_2, 4); \ - x3_2 ^= ROTR16(t0_3, 4); \ + x3_2 ^= ROR16(t0_0, 3); \ + x3_3 ^= ROR16(t0_1, 3); \ + x3_0 ^= ROR16(t0_2, 2); \ + x3_1 ^= ROR16(t0_3, 2); \ + x3_3 ^= ROR16(t0_0, 5); \ + x3_0 ^= ROR16(t0_1, 4); \ + x3_1 ^= ROR16(t0_2, 4); \ + x3_2 ^= ROR16(t0_3, 4); \ t0_0 = x4_0; \ t0_1 = x4_1; \ t0_2 = x4_2; \ t0_3 = x4_3; \ - x4_1 ^= ROTR16(t0_0, 2); \ - x4_2 ^= ROTR16(t0_1, 2); \ - x4_3 ^= ROTR16(t0_2, 2); \ - x4_0 ^= ROTR16(t0_3, 1); \ - x4_3 ^= ROTL16(t0_0, 5); \ - x4_0 ^= ROTL16(t0_1, 6); \ - x4_1 ^= ROTL16(t0_2, 6); \ - x4_2 ^= ROTL16(t0_3, 6); \ + x4_1 ^= ROR16(t0_0, 2); \ + x4_2 ^= ROR16(t0_1, 2); \ + x4_3 ^= ROR16(t0_2, 2); \ + x4_0 ^= ROR16(t0_3, 1); \ + x4_3 ^= ROL16(t0_0, 5); \ + x4_0 ^= ROL16(t0_1, 6); \ + x4_1 ^= ROL16(t0_2, 6); \ + x4_2 ^= ROL16(t0_3, 6); \ } while (0) #define P12_16 \ @@ -263,45 +257,44 @@ typedef unsigned long long u64; ROUND_16(1, 2, 1, 1); \ } while (0) -int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, - const unsigned char *m, unsigned long long mlen, - const unsigned char *ad, unsigned long long adlen, - const unsigned char *nsec, const unsigned char *npub, - const unsigned char *k) { - u64 rlen; - u64 i; - - u8 buffer[8]; - - u16 K0_0; - u16 K1_0; - u16 N0_0; - u16 N1_0; - u16 x0_0, x1_0, x2_0, x3_0, x4_0; - u16 t0_0, t1_0; - - u16 K0_1; - u16 K1_1; - u16 N0_1; - u16 N1_1; - u16 x0_1, x1_1, x2_1, x3_1, x4_1; - u16 t0_1, t1_1; - - u16 K0_2; - u16 K1_2; - u16 N0_2; - u16 N1_2; - u16 x0_2, x1_2, x2_2, x3_2, x4_2; - u16 t0_2, t1_2; - - u16 K0_3; - u16 K1_3; - u16 N0_3; - u16 N1_3; - u16 x0_3, x1_3, x2_3, x3_3, x4_3; - u16 t0_3, t1_3; - - u16 in_0, in_1, in_2, in_3; +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + uint64_t rlen; + uint64_t i; + + uint8_t buffer[8]; + + uint16_t K0_0; + uint16_t K1_0; + uint16_t N0_0; + uint16_t N1_0; + uint16_t x0_0, x1_0, x2_0, x3_0, x4_0; + uint16_t t0_0, t1_0; + + uint16_t K0_1; + uint16_t K1_1; + uint16_t N0_1; + uint16_t N1_1; + uint16_t x0_1, x1_1, x2_1, x3_1, x4_1; + uint16_t t0_1, t1_1; + + uint16_t K0_2; + uint16_t K1_2; + uint16_t N0_2; + uint16_t N1_2; + uint16_t x0_2, x1_2, x2_2, x3_2, x4_2; + uint16_t t0_2, t1_2; + + uint16_t K0_3; + uint16_t K1_3; + uint16_t N0_3; + uint16_t N1_3; + uint16_t x0_3, x1_3, x2_3, x3_3, x4_3; + uint16_t t0_3, t1_3; + + uint16_t in_0, in_1, in_2, in_3; (void)nsec; @@ -310,8 +303,8 @@ int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, COMPRESS_BYTE_ARRAY_16(npub, N0_3, N0_2, N0_1, N0_0); COMPRESS_BYTE_ARRAY_16(npub + 8, N1_3, N1_2, N1_1, N1_0); - // initialization - t1_0 = (u16)((CRYPTO_KEYBYTES * 8) << 8 | (RATE * 8) << 0); + /* initialization */ + t1_0 = (uint16_t)((CRYPTO_KEYBYTES * 8) << 8 | (ASCON_RATE * 8) << 0); t1_1 = t1_0 >> 1; t1_2 = t1_1 >> 1; t1_3 = t1_2 >> 1; @@ -323,7 +316,7 @@ int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, x0_1 = t1_1 << 12; x0_2 = t1_2 << 12; x0_3 = t1_3 << 12; - t1_0 = (u16)(PA_ROUNDS << 8 | PB_ROUNDS << 0); + t1_0 = (uint16_t)(PA_ROUNDS << 8 | PB_ROUNDS << 0); t1_1 = t1_0 >> 1; t1_2 = t1_1 >> 1; t1_3 = t1_2 >> 1; @@ -360,18 +353,18 @@ int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, x4_1 ^= K1_1; x4_2 ^= K1_2; x4_3 ^= K1_3; - // process associated data + /* process associated data */ if (adlen) { rlen = adlen; - while (rlen >= RATE) { + while (rlen >= ASCON_RATE) { COMPRESS_BYTE_ARRAY_16(ad, in_3, in_2, in_1, in_0); x0_0 ^= in_0; x0_1 ^= in_1; x0_2 ^= in_2; x0_3 ^= in_3; P6_16; - rlen -= RATE; - ad += RATE; + rlen -= ASCON_RATE; + ad += ASCON_RATE; } for (i = 0; i < rlen; ++i, ++ad) buffer[i] = *ad; buffer[rlen] = 0x80; @@ -385,9 +378,9 @@ int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, } x4_0 ^= 1; - // process plaintext + /* process plaintext */ rlen = mlen; - while (rlen >= RATE) { + while (rlen >= ASCON_RATE) { COMPRESS_BYTE_ARRAY_16(m, in_3, in_2, in_1, in_0); x0_0 ^= in_0; x0_1 ^= in_1; @@ -395,9 +388,9 @@ int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, x0_3 ^= in_3; EXPAND_BYTE_ARRAY_16(c, x0_3, x0_2, x0_1, x0_0); P6_16; - rlen -= RATE; - m += RATE; - c += RATE; + rlen -= ASCON_RATE; + m += ASCON_RATE; + c += ASCON_RATE; } for (i = 0; i < rlen; ++i, ++m) buffer[i] = *m; buffer[rlen] = 0x80; @@ -410,7 +403,7 @@ int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, EXPAND_BYTE_ARRAY_16(buffer, x0_3, x0_2, x0_1, x0_0); for (i = 0; i < rlen; ++i, ++c) *c = buffer[i]; - // finalization + /* finalization */ x1_0 ^= K0_0; x1_1 ^= K0_1; x1_2 ^= K0_2; @@ -429,7 +422,7 @@ int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, x4_2 ^= K1_2; x4_3 ^= K1_3; - // return tag + /* return tag */ EXPAND_BYTE_ARRAY_16(c, x3_3, x3_2, x3_1, x3_0); c += 8; EXPAND_BYTE_ARRAY_16(c, x4_3, x4_2, x4_1, x4_0); @@ -438,47 +431,45 @@ int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, return 0; } -int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, - unsigned char *nsec, const unsigned char *c, - unsigned long long clen, const unsigned char *ad, - unsigned long long adlen, const unsigned char *npub, - const unsigned char *k) { +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { *mlen = 0; if (clen < CRYPTO_ABYTES) return -1; - u64 rlen; - u64 i; - - u16 ret_val; - u8 buffer[8]; - - u16 K0_0; - u16 K1_0; - u16 N0_0; - u16 N1_0; - u16 x0_0, x1_0, x2_0, x3_0, x4_0; - u16 t0_0, t1_0; - u16 K0_1; - u16 K1_1; - u16 N0_1; - u16 N1_1; - u16 x0_1, x1_1, x2_1, x3_1, x4_1; - u16 t0_1, t1_1; - - u16 K0_2; - u16 K1_2; - u16 N0_2; - u16 N1_2; - u16 x0_2, x1_2, x2_2, x3_2, x4_2; - u16 t0_2, t1_2; - - u16 K0_3; - u16 K1_3; - u16 N0_3; - u16 N1_3; - u16 x0_3, x1_3, x2_3, x3_3, x4_3; - u16 t0_3, t1_3; - u16 in_0, in_1, in_2, in_3; + uint64_t rlen; + uint64_t i; + + uint16_t ret_val; + uint8_t buffer[8]; + + uint16_t K0_0; + uint16_t K1_0; + uint16_t N0_0; + uint16_t N1_0; + uint16_t x0_0, x1_0, x2_0, x3_0, x4_0; + uint16_t t0_0, t1_0; + uint16_t K0_1; + uint16_t K1_1; + uint16_t N0_1; + uint16_t N1_1; + uint16_t x0_1, x1_1, x2_1, x3_1, x4_1; + uint16_t t0_1, t1_1; + + uint16_t K0_2; + uint16_t K1_2; + uint16_t N0_2; + uint16_t N1_2; + uint16_t x0_2, x1_2, x2_2, x3_2, x4_2; + uint16_t t0_2, t1_2; + + uint16_t K0_3; + uint16_t K1_3; + uint16_t N0_3; + uint16_t N1_3; + uint16_t x0_3, x1_3, x2_3, x3_3, x4_3; + uint16_t t0_3, t1_3; + uint16_t in_0, in_1, in_2, in_3; (void)nsec; @@ -487,8 +478,8 @@ int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, COMPRESS_BYTE_ARRAY_16(npub, N0_3, N0_2, N0_1, N0_0); COMPRESS_BYTE_ARRAY_16(npub + 8, N1_3, N1_2, N1_1, N1_0); - // initialization - t1_0 = (u16)((CRYPTO_KEYBYTES * 8) << 8 | (RATE * 8) << 0); + /* initialization */ + t1_0 = (uint16_t)((CRYPTO_KEYBYTES * 8) << 8 | (ASCON_RATE * 8) << 0); t1_1 = t1_0 >> 1; t1_2 = t1_1 >> 1; t1_3 = t1_2 >> 1; @@ -500,7 +491,7 @@ int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, x0_1 = t1_1 << 12; x0_2 = t1_2 << 12; x0_3 = t1_3 << 12; - t1_0 = (u16)(PA_ROUNDS << 8 | PB_ROUNDS << 0); + t1_0 = (uint16_t)(PA_ROUNDS << 8 | PB_ROUNDS << 0); t1_1 = t1_0 >> 1; t1_2 = t1_1 >> 1; t1_3 = t1_2 >> 1; @@ -537,18 +528,18 @@ int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, x4_1 ^= K1_1; x4_2 ^= K1_2; x4_3 ^= K1_3; - // process associated data + /* process associated data */ if (adlen) { rlen = adlen; - while (rlen >= RATE) { + while (rlen >= ASCON_RATE) { COMPRESS_BYTE_ARRAY_16(ad, in_3, in_2, in_1, in_0); x0_0 ^= in_0; x0_1 ^= in_1; x0_2 ^= in_2; x0_3 ^= in_3; P6_16; - rlen -= RATE; - ad += RATE; + rlen -= ASCON_RATE; + ad += ASCON_RATE; } for (i = 0; i < rlen; ++i, ++ad) buffer[i] = *ad; buffer[rlen] = 0x80; @@ -562,22 +553,22 @@ int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, } x4_0 ^= 1; - // process plaintext + /* process plaintext */ rlen = clen - CRYPTO_KEYBYTES; - while (rlen >= RATE) { + while (rlen >= ASCON_RATE) { EXPAND_U16(&t1_0, x0_3, x0_2, x0_1, x0_0); EXPAND_U16(&t1_1, x0_3 >> 4, x0_2 >> 4, x0_1 >> 4, x0_0 >> 4); EXPAND_U16(&t1_2, x0_3 >> 8, x0_2 >> 8, x0_1 >> 8, x0_0 >> 8); EXPAND_U16(&t1_3, x0_3 >> 12, x0_2 >> 12, x0_1 >> 12, x0_0 >> 12); - ((u16 *)m)[0] = (t1_3) ^ ((u16 *)c)[0]; - ((u16 *)m)[1] = (t1_2) ^ ((u16 *)c)[1]; - ((u16 *)m)[2] = (t1_1) ^ ((u16 *)c)[2]; - ((u16 *)m)[3] = (t1_0) ^ ((u16 *)c)[3]; + ((uint16_t*)m)[0] = (t1_3) ^ ((uint16_t*)c)[0]; + ((uint16_t*)m)[1] = (t1_2) ^ ((uint16_t*)c)[1]; + ((uint16_t*)m)[2] = (t1_1) ^ ((uint16_t*)c)[2]; + ((uint16_t*)m)[3] = (t1_0) ^ ((uint16_t*)c)[3]; COMPRESS_BYTE_ARRAY_16(c, x0_3, x0_2, x0_1, x0_0); P6_16; - rlen -= RATE; - m += RATE; - c += RATE; + rlen -= ASCON_RATE; + m += ASCON_RATE; + c += ASCON_RATE; } EXPAND_BYTE_ARRAY_16(buffer, x0_3, x0_2, x0_1, x0_0); for (i = 0; i < rlen; ++i, ++m, ++c) { @@ -588,7 +579,7 @@ int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, COMPRESS_BYTE_ARRAY_16(buffer, x0_3, x0_2, x0_1, x0_0); - // finalization + /* finalization */ x1_0 ^= K0_0; x1_1 ^= K0_1; x1_2 ^= K0_2; @@ -607,32 +598,31 @@ int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, x4_2 ^= K1_2; x4_3 ^= K1_3; - // return -1 if verification fails + /* return -1 if verification fails */ ret_val = 0; EXPAND_U16(&t1_0, x3_3, x3_2, x3_1, x3_0); EXPAND_U16(&t1_1, x3_3 >> 4, x3_2 >> 4, x3_1 >> 4, x3_0 >> 4); EXPAND_U16(&t1_2, x3_3 >> 8, x3_2 >> 8, x3_1 >> 8, x3_0 >> 8); EXPAND_U16(&t1_3, x3_3 >> 12, x3_2 >> 12, x3_1 >> 12, x3_0 >> 12); - ret_val |= ((u16 *)c)[0] ^ (t1_3); - ret_val |= ((u16 *)c)[1] ^ (t1_2); - ret_val |= ((u16 *)c)[2] ^ (t1_1); - ret_val |= ((u16 *)c)[3] ^ (t1_0); + ret_val |= ((uint16_t*)c)[0] ^ (t1_3); + ret_val |= ((uint16_t*)c)[1] ^ (t1_2); + ret_val |= ((uint16_t*)c)[2] ^ (t1_1); + ret_val |= ((uint16_t*)c)[3] ^ (t1_0); EXPAND_U16(&t1_0, x4_3, x4_2, x4_1, x4_0); EXPAND_U16(&t1_1, x4_3 >> 4, x4_2 >> 4, x4_1 >> 4, x4_0 >> 4); EXPAND_U16(&t1_2, x4_3 >> 8, x4_2 >> 8, x4_1 >> 8, x4_0 >> 8); EXPAND_U16(&t1_3, x4_3 >> 12, x4_2 >> 12, x4_1 >> 12, x4_0 >> 12); - ret_val |= ((u16 *)c)[4] ^ (t1_3); - ret_val |= ((u16 *)c)[5] ^ (t1_2); - ret_val |= ((u16 *)c)[6] ^ (t1_1); - ret_val |= ((u16 *)c)[7] ^ (t1_0); + ret_val |= ((uint16_t*)c)[4] ^ (t1_3); + ret_val |= ((uint16_t*)c)[5] ^ (t1_2); + ret_val |= ((uint16_t*)c)[6] ^ (t1_1); + ret_val |= ((uint16_t*)c)[7] ^ (t1_0); if (ret_val != 0) return -1; - // return plaintext + /* return plaintext */ *mlen = clen - CRYPTO_ABYTES; return 0; } - diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi16/endian.h b/ascon/Implementations/crypto_aead/ascon128v12/bi16/endian.h index ffbdcd9..3944360 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/bi16/endian.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi16/endian.h @@ -3,29 +3,37 @@ #if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -// macros for big endian machines +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif #define U64BIG(x) (x) #define U32BIG(x) (x) #define U16BIG(x) (x) #elif defined(_MSC_VER) || \ - (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -// macros for little endian machines -#define U64BIG(x) \ - ((((x) & 0x00000000000000FFULL) << 56) | (((x) & 0x000000000000FF00ULL) << 40) | \ - (((x) & 0x0000000000FF0000ULL) << 24) | (((x) & 0x00000000FF000000ULL) << 8) | \ - (((x) & 0x000000FF00000000ULL) >> 8) | (((x) & 0x0000FF0000000000ULL) >> 24) | \ - (((x) & 0x00FF000000000000ULL) >> 40) | (((x) & 0xFF00000000000000ULL) >> 56)) +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) #define U32BIG(x) \ - ((((x) & 0x000000FF) << 24) | (((x) & 0x0000FF00) << 8) | \ - (((x) & 0x00FF0000) >> 8) | (((x) & 0xFF000000) >> 24)) -#define U16BIG(x) \ - ((((x) & 0x00FF) << 8) | (((x) & 0xFF00) >> 8)) + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) #else -#error "ascon byte order macros not defined in endian.h" +#error "Ascon byte order macros not defined in endian.h" #endif -#endif // ENDIAN_H_ - +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32/api.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32/api.h index a4aa567..96a7c47 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/bi32/api.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32/api.h @@ -3,3 +3,4 @@ #define CRYPTO_NPUBBYTES 16 #define CRYPTO_ABYTES 16 #define CRYPTO_NOOVERLAP 1 +#define ASCON_RATE 8 diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32/ascon.c b/ascon/Implementations/crypto_aead/ascon128v12/bi32/ascon.c new file mode 100644 index 0000000..9011a77 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32/ascon.c @@ -0,0 +1,250 @@ +#include "ascon.h" + +#include "api.h" +#include "loadstore.h" +#include "permutations.h" +#include "printstate.h" + +__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2, + const uint8_t* k) { + KINIT(K0, K1, K2); + if (CRYPTO_KEYBYTES == 20) { + XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4))); + k += 4; + } + XOR(*K1, LOAD64(k)); + XOR(*K2, LOAD64(k + 8)); +} + +__forceinline void init(state_t* s, const uint8_t* npub, const uint8_t* k) { + word_t N0, N1; + word_t K0, K1, K2; + /* load nonce */ + N0 = LOAD64(npub); + N1 = LOAD64(npub + 8); + /* load key */ + loadkey(&K0, &K1, &K2, k); + /* initialization */ + PINIT(s); + XOR(s->x0, IV); + if (CRYPTO_KEYBYTES == 20) XOR(s->x0, K0); + XOR(s->x1, K1); + XOR(s->x2, K2); + XOR(s->x3, N0); + XOR(s->x4, N1); + P12(s); + if (CRYPTO_KEYBYTES == 20) XOR(s->x2, K0); + XOR(s->x3, K1); + XOR(s->x4, K2); + printstate("initialization", s); +} + +__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + word_t* restrict px; + /* process associated data */ + if (adlen) { + while (adlen >= ASCON_RATE) { + XOR(s->x0, LOAD64(ad)); + if (ASCON_RATE == 16) XOR(s->x1, LOAD64(ad + 8)); + PB(s); + ad += ASCON_RATE; + adlen -= ASCON_RATE; + } + /* final associated data block */ + px = &s->x0; + if (ASCON_RATE == 16 && adlen >= 8) { + XOR(s->x0, LOAD64(ad)); + px = &s->x1; + ad += 8; + adlen -= 8; + } + if (adlen) XOR(*px, LOAD(ad, adlen)); + XOR(*px, PAD(adlen)); + PB(s); + } + XOR(s->x4, WORD_T(1)); + printstate("process associated data", s); +} + +__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m, + uint64_t mlen) { + word_t* restrict px; + /* process plaintext */ + while (mlen >= ASCON_RATE) { + XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + if (ASCON_RATE == 16) { + XOR(s->x1, LOAD64(m + 8)); + STORE64(c + 8, s->x1); + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + mlen -= ASCON_RATE; + } + /* final plaintext block */ + px = &s->x0; + if (ASCON_RATE == 16 && mlen >= 8) { + XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + px = &s->x1; + m += 8; + c += 8; + mlen -= 8; + } + if (mlen) { + XOR(*px, LOAD(m, mlen)); + STORE(c, *px, mlen); + } + XOR(*px, PAD(mlen)); + printstate("process plaintext", s); +} + +__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c, + uint64_t clen) { + word_t* restrict px; + word_t cx; + /* process ciphertext */ + while (clen >= ASCON_RATE) { + cx = LOAD64(c); + XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + if (ASCON_RATE == 16) { + cx = LOAD64(c + 8); + XOR(s->x1, cx); + STORE64(m + 8, s->x1); + s->x1 = cx; + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + clen -= ASCON_RATE; + } + /* final ciphertext block */ + px = &s->x0; + if (ASCON_RATE == 16 && clen >= 8) { + cx = LOAD64(c); + XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + px = &s->x1; + m += 8; + c += 8; + clen -= 8; + } + if (clen) { + cx = LOAD(c, clen); + XOR(*px, cx); + STORE(m, *px, clen); + AND(*px, XMASK(clen)); + XOR(*px, cx); + } + XOR(*px, PAD(clen)); + printstate("process ciphertext", s); +} + +__forceinline void final(state_t* s, const uint8_t* k) { + word_t K0, K1, K2; + /* load key */ + loadkey(&K0, &K1, &K2, k); + /* finalization */ + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) { + XOR(s->x1, K1); + XOR(s->x2, K2); + } + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { + XOR(s->x2, K1); + XOR(s->x3, K2); + } + if (CRYPTO_KEYBYTES == 20) { + XOR(s->x1, KEYROT(K0, K1)); + XOR(s->x2, KEYROT(K1, K2)); + XOR(s->x3, KEYROT(K2, WORD_T(0))); + } + P12(s); + XOR(s->x3, K1); + XOR(s->x4, K2); + printstate("finalization", s); +} + +#if !ASCON_INLINE_MODE + +#define INIT ascon_init +#define ABSORB ascon_absorb +#define ENCRYPT ascon_encrypt +#define DECRYPT ascon_decrypt +#define FINAL ascon_final + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) { + init(s, npub, k); +} + +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + absorb(s, ad, adlen); +} + +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) { + encrypt(s, c, m, mlen); +} + +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) { + decrypt(s, m, c, clen); +} + +void ascon_final(state_t* s, const uint8_t* k) { final(s, k); } + +#else + +#define INIT init +#define ABSORB absorb +#define ENCRYPT encrypt +#define DECRYPT decrypt +#define FINAL final + +#endif + +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + state_t s; + (void)nsec; + *clen = mlen + CRYPTO_ABYTES; + /* perform ascon computation */ + INIT(&s, npub, k); + ABSORB(&s, ad, adlen); + ENCRYPT(&s, c, m, mlen); + FINAL(&s, k); + /* set tag */ + c += mlen; + STORE64(c, s.x3); + STORE64(c + 8, s.x4); + return 0; +} + +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { + state_t s; + (void)nsec; + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + *mlen = clen = clen - CRYPTO_ABYTES; + /* perform ascon computation */ + INIT(&s, npub, k); + ABSORB(&s, ad, adlen); + DECRYPT(&s, m, c, clen); + FINAL(&s, k); + /* verify tag (should be constant time, check compiler output) */ + c += clen; + XOR(s.x3, LOAD64(c)); + XOR(s.x4, LOAD64(c + 8)); + if (NOTZERO(s.x3, s.x4)) { + *mlen = 0; + return -1; + } + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32/ascon.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32/ascon.h new file mode 100644 index 0000000..10a5b6e --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32/ascon.h @@ -0,0 +1,19 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#include "config.h" +#include "word.h" + +typedef struct { + word_t x0, x1, x2, x3, x4; +} state_t; + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k); +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen); +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); +void ascon_final(state_t* s, const uint8_t* k); + +#endif // ASCON_H_ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32/config.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32/config.h new file mode 100644 index 0000000..b1b5080 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32/config.h @@ -0,0 +1,34 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* inline the Ascon mode */ +#ifndef ASCON_INLINE_MODE +#define ASCON_INLINE_MODE 1 +#endif + +/* inline the Ascon permutations */ +#ifndef ASCON_INLINE_PERM +#define ASCON_INLINE_PERM 0 +#endif + +/* single function for all permutations */ +#ifndef ASCON_SINGLE_PERM +#define ASCON_SINGLE_PERM 0 +#endif + +/* unroll the permutation loops */ +#ifndef ASCON_UNROLL_LOOPS +#define ASCON_UNROLL_LOOPS 0 +#endif + +/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ +#ifndef ASCON_DATA_ACCESS +#define ASCON_DATA_ACCESS 'M' +#endif + +/* make sure __forceinline is supported */ +#ifndef __forceinline +#define __forceinline inline __attribute__((always_inline)) +#endif + +#endif /* CONFIG_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32/endian.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32/endian.h index ffbdcd9..3944360 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/bi32/endian.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32/endian.h @@ -3,29 +3,37 @@ #if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -// macros for big endian machines +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif #define U64BIG(x) (x) #define U32BIG(x) (x) #define U16BIG(x) (x) #elif defined(_MSC_VER) || \ - (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -// macros for little endian machines -#define U64BIG(x) \ - ((((x) & 0x00000000000000FFULL) << 56) | (((x) & 0x000000000000FF00ULL) << 40) | \ - (((x) & 0x0000000000FF0000ULL) << 24) | (((x) & 0x00000000FF000000ULL) << 8) | \ - (((x) & 0x000000FF00000000ULL) >> 8) | (((x) & 0x0000FF0000000000ULL) >> 24) | \ - (((x) & 0x00FF000000000000ULL) >> 40) | (((x) & 0xFF00000000000000ULL) >> 56)) +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) #define U32BIG(x) \ - ((((x) & 0x000000FF) << 24) | (((x) & 0x0000FF00) << 8) | \ - (((x) & 0x00FF0000) >> 8) | (((x) & 0xFF000000) >> 24)) -#define U16BIG(x) \ - ((((x) & 0x00FF) << 8) | (((x) & 0xFF00) >> 8)) + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) #else -#error "ascon byte order macros not defined in endian.h" +#error "Ascon byte order macros not defined in endian.h" #endif -#endif // ENDIAN_H_ - +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32/loadstore.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32/loadstore.h new file mode 100644 index 0000000..6cb0cca --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32/loadstore.h @@ -0,0 +1,118 @@ +#ifndef LOADSTORE_H_ +#define LOADSTORE_H_ + +#include + +#include "config.h" +#include "endian.h" +#include "word.h" + +/* 64-bit LSB mask (undefined for n == 0) */ +#define MASK(n) (~0ull >> (64 - (n))) + +/* get byte from Ascon 64-bit word */ +#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i)))) + +/* set byte in Ascon 64-bit word */ +#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i))) + +#if ASCON_DATA_ACCESS == 'W' + +#ifndef NDEBUG +#pragma message("Using wordwise data access") +#endif + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = U64BIG(*(uint64_t*)bytes); + return U64TOWORD(x); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = U64BIG(*(uint64_t*)bytes & MASK(8 * n)); + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(8 * n); + *(uint64_t*)bytes |= U64BIG(x); +} + +#elif ASCON_DATA_ACCESS == 'M' + +#ifndef NDEBUG +#pragma message("Using memcpy to access data") +#endif + +#include + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + memcpy((uint8_t*)&x, bytes, n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = U64BIG(WORDTOU64(w)); + memcpy(bytes, (uint8_t*)&x, n); +} + +#elif ASCON_DATA_ACCESS == 'B' + +#ifndef NDEBUG +#pragma message("Using bytewise data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#elif ASCON_DATA_ACCESS == 'H' + +#ifndef NDEBUG +#pragma message("Using hybrid data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + if (n == 8) + x = U64BIG(*(uint64_t*)bytes); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + if (n == 8) + *(uint64_t*)bytes = U64BIG(x); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#else +#error "Ascon data access macro not defined correctly" +#endif + +#endif /* LOADSTORE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32/permutations.c b/ascon/Implementations/crypto_aead/ascon128v12/bi32/permutations.c new file mode 100644 index 0000000..1bca2ef --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32/permutations.c @@ -0,0 +1,35 @@ +#include "permutations.h" + +#include "round.h" + +#if !ASCON_UNROLL_LOOPS || ASCON_SINGLE_PERM + +const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, + {0x6, 0xc}, {0x3, 0xc}, {0x6, 0x9}, {0x3, 0x9}, + {0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}}; + +#endif + +#if ASCON_INLINE_PERM + +#elif ASCON_SINGLE_PERM + +void P(state_t* s, uint8_t rounds) { + printstate(" permutation input", s); + for (int i = START(rounds); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s) { P12ROUNDS(s); } + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16 +void P8(state_t* s) { P8ROUNDS(s); } +#endif + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 +void P6(state_t* s) { P6ROUNDS(s); } +#endif + +#endif diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32/permutations.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32/permutations.h index 8b13e99..ef338f1 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/bi32/permutations.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32/permutations.h @@ -1,128 +1,189 @@ #ifndef PERMUTATIONS_H_ #define PERMUTATIONS_H_ -#include "endian.h" - -typedef unsigned char u8; -typedef unsigned int u32; -typedef unsigned long long u64; - -typedef struct { - u32 e; - u32 o; -} u32_2; - -#define EXT_BYTE64(x, n) ((u8)((u64)(x) >> (8 * (7 - (n))))) -#define INS_BYTE64(x, n) ((u64)(x) << (8 * (7 - (n)))) -#define ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n)))) - -// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 -#define to_bit_interleaving(out, in) \ - do { \ - u32 hi = (in) >> 32; \ - u32 lo = (u32)(in); \ - u32 r0, r1; \ - r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \ - r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \ - r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \ - r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \ - r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \ - r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \ - r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \ - r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \ - (out).e = (lo & 0x0000FFFF) | (hi << 16); \ - (out).o = (lo >> 16) | (hi & 0xFFFF0000); \ - } while (0) - -// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 -#define from_bit_interleaving(out, in) \ - do { \ - u32 lo = ((in).e & 0x0000FFFF) | ((in).o << 16); \ - u32 hi = ((in).e >> 16) | ((in).o & 0xFFFF0000); \ - u32 r0, r1; \ - r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \ - r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \ - r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \ - r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \ - r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \ - r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \ - r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \ - r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \ - out = (u64)hi << 32 | lo; \ - } while (0) - -#define ROUND(C_e, C_o) \ - do { \ - /* round constant */ \ - x2.e ^= C_e; x2.o ^= C_o; \ - /* s-box layer */ \ - x0.e ^= x4.e; x0.o ^= x4.o; \ - x4.e ^= x3.e; x4.o ^= x3.o; \ - x2.e ^= x1.e; x2.o ^= x1.o; \ - t0.e = x0.e; t0.o = x0.o; \ - t4.e = x4.e; t4.o = x4.o; \ - t3.e = x3.e; t3.o = x3.o; \ - t1.e = x1.e; t1.o = x1.o; \ - t2.e = x2.e; t2.o = x2.o; \ - x0.e = t0.e ^ (~t1.e & t2.e); x0.o = t0.o ^ (~t1.o & t2.o); \ - x2.e = t2.e ^ (~t3.e & t4.e); x2.o = t2.o ^ (~t3.o & t4.o); \ - x4.e = t4.e ^ (~t0.e & t1.e); x4.o = t4.o ^ (~t0.o & t1.o); \ - x1.e = t1.e ^ (~t2.e & t3.e); x1.o = t1.o ^ (~t2.o & t3.o); \ - x3.e = t3.e ^ (~t4.e & t0.e); x3.o = t3.o ^ (~t4.o & t0.o); \ - x1.e ^= x0.e; x1.o ^= x0.o; \ - x3.e ^= x2.e; x3.o ^= x2.o; \ - x0.e ^= x4.e; x0.o ^= x4.o; \ - /* linear layer */ \ - t0.e = x0.e ^ ROTR32(x0.o, 4); t0.o = x0.o ^ ROTR32(x0.e, 5); \ - t1.e = x1.e ^ ROTR32(x1.e, 11); t1.o = x1.o ^ ROTR32(x1.o, 11); \ - t2.e = x2.e ^ ROTR32(x2.o, 2); t2.o = x2.o ^ ROTR32(x2.e, 3); \ - t3.e = x3.e ^ ROTR32(x3.o, 3); t3.o = x3.o ^ ROTR32(x3.e, 4); \ - t4.e = x4.e ^ ROTR32(x4.e, 17); t4.o = x4.o ^ ROTR32(x4.o, 17); \ - x0.e ^= ROTR32(t0.o, 9); x0.o ^= ROTR32(t0.e, 10); \ - x1.e ^= ROTR32(t1.o, 19); x1.o ^= ROTR32(t1.e, 20); \ - x2.e ^= t2.o; x2.o ^= ROTR32(t2.e, 1); \ - x3.e ^= ROTR32(t3.e, 5); x3.o ^= ROTR32(t3.o, 5); \ - x4.e ^= ROTR32(t4.o, 3); x4.o ^= ROTR32(t4.e, 4); \ - x2.e = ~x2.e; x2.o = ~x2.o; \ - } while(0) - -#define P12() \ - do { \ - ROUND(0xc, 0xc); \ - ROUND(0x9, 0xc); \ - ROUND(0xc, 0x9); \ - ROUND(0x9, 0x9); \ - ROUND(0x6, 0xc); \ - ROUND(0x3, 0xc); \ - ROUND(0x6, 0x9); \ - ROUND(0x3, 0x9); \ - ROUND(0xc, 0x6); \ - ROUND(0x9, 0x6); \ - ROUND(0xc, 0x3); \ - ROUND(0x9, 0x3); \ - } while (0) - -#define P8() \ - do { \ - ROUND(0x6, 0xc); \ - ROUND(0x3, 0xc); \ - ROUND(0x6, 0x9); \ - ROUND(0x3, 0x9); \ - ROUND(0xc, 0x6); \ - ROUND(0x9, 0x6); \ - ROUND(0xc, 0x3); \ - ROUND(0x9, 0x3); \ - } while (0) - -#define P6() \ - do { \ - ROUND(0x6, 0x9); \ - ROUND(0x3, 0x9); \ - ROUND(0xc, 0x6); \ - ROUND(0x9, 0x6); \ - ROUND(0xc, 0x3); \ - ROUND(0x9, 0x3); \ - } while (0) - -#endif // PERMUTATIONS_H_ +#include +#include "api.h" +#include "ascon.h" +#include "printstate.h" +#include "round.h" + +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 + +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 + +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 + +#define ASCON_HASH_BYTES 32 + +#define ASCON_128_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_128A_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) + +#define ASCON_80PQ_IV \ + U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_HASH_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) + +#define ASCON_XOF_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) + +#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) +#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) +#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) +#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) +#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) + +#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) +#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) +#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) +#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) +#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 +#define IV ASCON_128_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#if ASCON_RATE == 16 +#define IV ASCON_128A_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define PB P8 +#endif + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 +#define IV ASCON_80PQ_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#define START(n) (12 - n) + +#if ASCON_UNROLL_LOOPS + +__forceinline void P12ROUNDS(state_t* s) { + ROUND(s, 0xc, 0xc); + ROUND(s, 0x9, 0xc); + ROUND(s, 0xc, 0x9); + ROUND(s, 0x9, 0x9); + ROUND(s, 0x6, 0xc); + ROUND(s, 0x3, 0xc); + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +__forceinline void P8ROUNDS(state_t* s) { + ROUND(s, 0x6, 0xc); + ROUND(s, 0x3, 0xc); + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +__forceinline void P6ROUNDS(state_t* s) { + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +#else /* !ASCON_UNROLL_LOOPS */ + +extern const uint8_t constants[][2]; + +__forceinline void P12ROUNDS(state_t* s) { + for (int i = START(12); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +__forceinline void P8ROUNDS(state_t* s) { + for (int i = START(8); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +__forceinline void P6ROUNDS(state_t* s) { + for (int i = START(6); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +#endif + +#if ASCON_INLINE_PERM + +__forceinline void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +__forceinline void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} + +__forceinline void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#elif ASCON_SINGLE_PERM + +#define P12(s) P(s, 12) +#define P8(s) P(s, 8) +#define P6(s) P(s, 6) + +void P(state_t* s, uint8_t rounds); + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s); +void P8(state_t* s); +void P6(state_t* s); + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#endif + +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32/printstate.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32/printstate.h new file mode 100644 index 0000000..34bd476 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32/printstate.h @@ -0,0 +1,31 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32/round.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32/round.h new file mode 100644 index 0000000..d8ea3b6 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32/round.h @@ -0,0 +1,85 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" + +__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { + *K0 = WORD_T(0); + *K1 = WORD_T(0); + *K2 = WORD_T(0); +} + +__forceinline void PINIT(state_t* s) { + s->x0 = WORD_T(0); + s->x1 = WORD_T(0); + s->x2 = WORD_T(0); + s->x3 = WORD_T(0); + s->x4 = WORD_T(0); +} + +__forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) { + state_t t; + /* round constant */ + s->x2.e ^= C_e; + s->x2.o ^= C_o; + /* s-box layer */ + s->x0.e ^= s->x4.e; + s->x0.o ^= s->x4.o; + s->x4.e ^= s->x3.e; + s->x4.o ^= s->x3.o; + s->x2.e ^= s->x1.e; + s->x2.o ^= s->x1.o; + t.x0.e = s->x0.e; + t.x0.o = s->x0.o; + t.x4.e = s->x4.e; + t.x4.o = s->x4.o; + t.x3.e = s->x3.e; + t.x3.o = s->x3.o; + t.x1.e = s->x1.e; + t.x1.o = s->x1.o; + t.x2.e = s->x2.e; + t.x2.o = s->x2.o; + s->x0.e = t.x0.e ^ (~t.x1.e & t.x2.e); + s->x0.o = t.x0.o ^ (~t.x1.o & t.x2.o); + s->x2.e = t.x2.e ^ (~t.x3.e & t.x4.e); + s->x2.o = t.x2.o ^ (~t.x3.o & t.x4.o); + s->x4.e = t.x4.e ^ (~t.x0.e & t.x1.e); + s->x4.o = t.x4.o ^ (~t.x0.o & t.x1.o); + s->x1.e = t.x1.e ^ (~t.x2.e & t.x3.e); + s->x1.o = t.x1.o ^ (~t.x2.o & t.x3.o); + s->x3.e = t.x3.e ^ (~t.x4.e & t.x0.e); + s->x3.o = t.x3.o ^ (~t.x4.o & t.x0.o); + s->x1.e ^= s->x0.e; + s->x1.o ^= s->x0.o; + s->x3.e ^= s->x2.e; + s->x3.o ^= s->x2.o; + s->x0.e ^= s->x4.e; + s->x0.o ^= s->x4.o; + /* linear layer */ + t.x0.e = s->x0.e ^ ROR32(s->x0.o, 4); + t.x0.o = s->x0.o ^ ROR32(s->x0.e, 5); + t.x1.e = s->x1.e ^ ROR32(s->x1.e, 11); + t.x1.o = s->x1.o ^ ROR32(s->x1.o, 11); + t.x2.e = s->x2.e ^ ROR32(s->x2.o, 2); + t.x2.o = s->x2.o ^ ROR32(s->x2.e, 3); + t.x3.e = s->x3.e ^ ROR32(s->x3.o, 3); + t.x3.o = s->x3.o ^ ROR32(s->x3.e, 4); + t.x4.e = s->x4.e ^ ROR32(s->x4.e, 17); + t.x4.o = s->x4.o ^ ROR32(s->x4.o, 17); + s->x0.e ^= ROR32(t.x0.o, 9); + s->x0.o ^= ROR32(t.x0.e, 10); + s->x1.e ^= ROR32(t.x1.o, 19); + s->x1.o ^= ROR32(t.x1.e, 20); + s->x2.e ^= t.x2.o; + s->x2.o ^= ROR32(t.x2.e, 1); + s->x3.e ^= ROR32(t.x3.e, 5); + s->x3.o ^= ROR32(t.x3.o, 5); + s->x4.e ^= ROR32(t.x4.o, 3); + s->x4.o ^= ROR32(t.x4.e, 4); + s->x2.e = ~s->x2.e; + s->x2.o = ~s->x2.o; + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32/word.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32/word.h new file mode 100644 index 0000000..8ffcaaa --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32/word.h @@ -0,0 +1,117 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#include "config.h" + +typedef struct { + uint32_t e; + uint32_t o; +} word_t; + +__forceinline word_t WORD_T(uint64_t x) { + return (word_t){.o = x >> 32, .e = x}; +} + +__forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; } + +__forceinline uint64_t TOBI32(uint64_t in); + +__forceinline uint64_t FROMBI32(uint64_t in); + +__forceinline word_t U64TOWORD(uint64_t x) { + uint64_t w = TOBI32(x); + return (word_t){.o = w >> 32, .e = w}; +} + +__forceinline uint64_t WORDTOU64(word_t w) { + return FROMBI32((uint64_t)w.o << 32 | w.e); +} + +#define XOR(a, b) \ + do { \ + word_t tb = b; \ + (a).e ^= tb.e; \ + (a).o ^= tb.o; \ + } while (0) + +#define AND(a, b) \ + do { \ + word_t tb = b; \ + (a).e &= tb.e; \ + (a).o &= tb.o; \ + } while (0) + +__forceinline uint32_t ROR32(uint32_t x, int n) { + return x >> n | x << (32 - n); +} + +__forceinline word_t ROR64(word_t x, int n) { + word_t r; + r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); + r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); + return r; +} + +__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { + word_t r; + r.o = lo2hi.o << 16 | hi2lo.o >> 16; + r.e = lo2hi.e << 16 | hi2lo.e >> 16; + return r; +} + +__forceinline int NOTZERO(word_t a, word_t b) { + int result = 0; + for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i]; + for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i]; + return result; +} + +/* set padding byte in 64-bit Ascon word */ +__forceinline word_t PAD(int i) { + return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32); +} + +/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ +__forceinline word_t XMASK(int n) { + uint32_t mask = 0x0fffffff >> (n * 4 - 4); + return WORD_T((uint64_t)mask << 32 | mask); +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t TOBI32(uint64_t in) { + uint32_t hi = in >> 32; + uint32_t lo = in; + uint32_t r0, r1; + r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); + r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); + r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); + r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); + r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); + r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); + r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); + r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); + r0 = (lo & 0x0000FFFF) | (hi << 16); + r1 = (lo >> 16) | (hi & 0xFFFF0000); + return (uint64_t)r1 << 32 | r0; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t FROMBI32(uint64_t in) { + uint32_t r0 = in; + uint32_t r1 = in >> 32; + uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); + uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000); + r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); + r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); + r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); + r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); + r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); + r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); + r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); + r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); + return (uint64_t)hi << 32 | lo; +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_2shares/api.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_2shares/api.h new file mode 100644 index 0000000..96a7c47 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_2shares/api.h @@ -0,0 +1,6 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 +#define ASCON_RATE 8 diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_2shares/ascon.c b/ascon/Implementations/crypto_aead/ascon128v12/bi32_2shares/ascon.c new file mode 100644 index 0000000..9011a77 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_2shares/ascon.c @@ -0,0 +1,250 @@ +#include "ascon.h" + +#include "api.h" +#include "loadstore.h" +#include "permutations.h" +#include "printstate.h" + +__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2, + const uint8_t* k) { + KINIT(K0, K1, K2); + if (CRYPTO_KEYBYTES == 20) { + XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4))); + k += 4; + } + XOR(*K1, LOAD64(k)); + XOR(*K2, LOAD64(k + 8)); +} + +__forceinline void init(state_t* s, const uint8_t* npub, const uint8_t* k) { + word_t N0, N1; + word_t K0, K1, K2; + /* load nonce */ + N0 = LOAD64(npub); + N1 = LOAD64(npub + 8); + /* load key */ + loadkey(&K0, &K1, &K2, k); + /* initialization */ + PINIT(s); + XOR(s->x0, IV); + if (CRYPTO_KEYBYTES == 20) XOR(s->x0, K0); + XOR(s->x1, K1); + XOR(s->x2, K2); + XOR(s->x3, N0); + XOR(s->x4, N1); + P12(s); + if (CRYPTO_KEYBYTES == 20) XOR(s->x2, K0); + XOR(s->x3, K1); + XOR(s->x4, K2); + printstate("initialization", s); +} + +__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + word_t* restrict px; + /* process associated data */ + if (adlen) { + while (adlen >= ASCON_RATE) { + XOR(s->x0, LOAD64(ad)); + if (ASCON_RATE == 16) XOR(s->x1, LOAD64(ad + 8)); + PB(s); + ad += ASCON_RATE; + adlen -= ASCON_RATE; + } + /* final associated data block */ + px = &s->x0; + if (ASCON_RATE == 16 && adlen >= 8) { + XOR(s->x0, LOAD64(ad)); + px = &s->x1; + ad += 8; + adlen -= 8; + } + if (adlen) XOR(*px, LOAD(ad, adlen)); + XOR(*px, PAD(adlen)); + PB(s); + } + XOR(s->x4, WORD_T(1)); + printstate("process associated data", s); +} + +__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m, + uint64_t mlen) { + word_t* restrict px; + /* process plaintext */ + while (mlen >= ASCON_RATE) { + XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + if (ASCON_RATE == 16) { + XOR(s->x1, LOAD64(m + 8)); + STORE64(c + 8, s->x1); + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + mlen -= ASCON_RATE; + } + /* final plaintext block */ + px = &s->x0; + if (ASCON_RATE == 16 && mlen >= 8) { + XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + px = &s->x1; + m += 8; + c += 8; + mlen -= 8; + } + if (mlen) { + XOR(*px, LOAD(m, mlen)); + STORE(c, *px, mlen); + } + XOR(*px, PAD(mlen)); + printstate("process plaintext", s); +} + +__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c, + uint64_t clen) { + word_t* restrict px; + word_t cx; + /* process ciphertext */ + while (clen >= ASCON_RATE) { + cx = LOAD64(c); + XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + if (ASCON_RATE == 16) { + cx = LOAD64(c + 8); + XOR(s->x1, cx); + STORE64(m + 8, s->x1); + s->x1 = cx; + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + clen -= ASCON_RATE; + } + /* final ciphertext block */ + px = &s->x0; + if (ASCON_RATE == 16 && clen >= 8) { + cx = LOAD64(c); + XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + px = &s->x1; + m += 8; + c += 8; + clen -= 8; + } + if (clen) { + cx = LOAD(c, clen); + XOR(*px, cx); + STORE(m, *px, clen); + AND(*px, XMASK(clen)); + XOR(*px, cx); + } + XOR(*px, PAD(clen)); + printstate("process ciphertext", s); +} + +__forceinline void final(state_t* s, const uint8_t* k) { + word_t K0, K1, K2; + /* load key */ + loadkey(&K0, &K1, &K2, k); + /* finalization */ + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) { + XOR(s->x1, K1); + XOR(s->x2, K2); + } + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { + XOR(s->x2, K1); + XOR(s->x3, K2); + } + if (CRYPTO_KEYBYTES == 20) { + XOR(s->x1, KEYROT(K0, K1)); + XOR(s->x2, KEYROT(K1, K2)); + XOR(s->x3, KEYROT(K2, WORD_T(0))); + } + P12(s); + XOR(s->x3, K1); + XOR(s->x4, K2); + printstate("finalization", s); +} + +#if !ASCON_INLINE_MODE + +#define INIT ascon_init +#define ABSORB ascon_absorb +#define ENCRYPT ascon_encrypt +#define DECRYPT ascon_decrypt +#define FINAL ascon_final + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) { + init(s, npub, k); +} + +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + absorb(s, ad, adlen); +} + +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) { + encrypt(s, c, m, mlen); +} + +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) { + decrypt(s, m, c, clen); +} + +void ascon_final(state_t* s, const uint8_t* k) { final(s, k); } + +#else + +#define INIT init +#define ABSORB absorb +#define ENCRYPT encrypt +#define DECRYPT decrypt +#define FINAL final + +#endif + +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + state_t s; + (void)nsec; + *clen = mlen + CRYPTO_ABYTES; + /* perform ascon computation */ + INIT(&s, npub, k); + ABSORB(&s, ad, adlen); + ENCRYPT(&s, c, m, mlen); + FINAL(&s, k); + /* set tag */ + c += mlen; + STORE64(c, s.x3); + STORE64(c + 8, s.x4); + return 0; +} + +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { + state_t s; + (void)nsec; + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + *mlen = clen = clen - CRYPTO_ABYTES; + /* perform ascon computation */ + INIT(&s, npub, k); + ABSORB(&s, ad, adlen); + DECRYPT(&s, m, c, clen); + FINAL(&s, k); + /* verify tag (should be constant time, check compiler output) */ + c += clen; + XOR(s.x3, LOAD64(c)); + XOR(s.x4, LOAD64(c + 8)); + if (NOTZERO(s.x3, s.x4)) { + *mlen = 0; + return -1; + } + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_2shares/ascon.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_2shares/ascon.h new file mode 100644 index 0000000..d11fa01 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_2shares/ascon.h @@ -0,0 +1,20 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#include "config.h" +#include "word.h" + +typedef struct { + word_t x0, x1, x2, x3, x4; + word_t rx; +} state_t; + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k); +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen); +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); +void ascon_final(state_t* s, const uint8_t* k); + +#endif // ASCON_H_ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_2shares/config.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_2shares/config.h new file mode 100644 index 0000000..e1a4d6e --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_2shares/config.h @@ -0,0 +1,44 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* inline the Ascon mode */ +#ifndef ASCON_INLINE_MODE +#define ASCON_INLINE_MODE 1 +#endif + +/* inline the Ascon permutations */ +#ifndef ASCON_INLINE_PERM +#define ASCON_INLINE_PERM 0 +#endif + +/* single function for all permutations */ +#ifndef ASCON_SINGLE_PERM +#define ASCON_SINGLE_PERM 0 +#endif + +/* unroll the permutation loops */ +#ifndef ASCON_UNROLL_LOOPS +#define ASCON_UNROLL_LOOPS 0 +#endif + +/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ +#ifndef ASCON_DATA_ACCESS +#define ASCON_DATA_ACCESS 'B' +#endif + +/* Ascon mask generator { 'S'tdlib, 'X'orshift, 'R'andombytes } */ +#ifndef ASCON_MASK_RNG +#define ASCON_MASK_RNG 'X' +#endif + +/* mask key/data loads */ +#ifndef ASCON_MASK_LOADS +#define ASCON_MASK_LOADS 0 +#endif + +/* make sure __forceinline is supported */ +#ifndef __forceinline +#define __forceinline inline __attribute__((always_inline)) +#endif + +#endif /* CONFIG_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_2shares/endian.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_2shares/endian.h new file mode 100644 index 0000000..3944360 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_2shares/endian.h @@ -0,0 +1,39 @@ +#ifndef ENDIAN_H_ +#define ENDIAN_H_ + +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif +#define U64BIG(x) (x) +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +#elif defined(_MSC_VER) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) +#define U32BIG(x) \ + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) + +#else +#error "Ascon byte order macros not defined in endian.h" +#endif + +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_2shares/implementors b/ascon/Implementations/crypto_aead/ascon128v12/bi32_2shares/implementors new file mode 100644 index 0000000..b110c1a --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_2shares/implementors @@ -0,0 +1,2 @@ +Christoph Dobraunig +Martin Schläffer diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_2shares/loadstore.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_2shares/loadstore.h new file mode 100644 index 0000000..6cb0cca --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_2shares/loadstore.h @@ -0,0 +1,118 @@ +#ifndef LOADSTORE_H_ +#define LOADSTORE_H_ + +#include + +#include "config.h" +#include "endian.h" +#include "word.h" + +/* 64-bit LSB mask (undefined for n == 0) */ +#define MASK(n) (~0ull >> (64 - (n))) + +/* get byte from Ascon 64-bit word */ +#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i)))) + +/* set byte in Ascon 64-bit word */ +#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i))) + +#if ASCON_DATA_ACCESS == 'W' + +#ifndef NDEBUG +#pragma message("Using wordwise data access") +#endif + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = U64BIG(*(uint64_t*)bytes); + return U64TOWORD(x); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = U64BIG(*(uint64_t*)bytes & MASK(8 * n)); + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(8 * n); + *(uint64_t*)bytes |= U64BIG(x); +} + +#elif ASCON_DATA_ACCESS == 'M' + +#ifndef NDEBUG +#pragma message("Using memcpy to access data") +#endif + +#include + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + memcpy((uint8_t*)&x, bytes, n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = U64BIG(WORDTOU64(w)); + memcpy(bytes, (uint8_t*)&x, n); +} + +#elif ASCON_DATA_ACCESS == 'B' + +#ifndef NDEBUG +#pragma message("Using bytewise data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#elif ASCON_DATA_ACCESS == 'H' + +#ifndef NDEBUG +#pragma message("Using hybrid data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + if (n == 8) + x = U64BIG(*(uint64_t*)bytes); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + if (n == 8) + *(uint64_t*)bytes = U64BIG(x); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#else +#error "Ascon data access macro not defined correctly" +#endif + +#endif /* LOADSTORE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_2shares/permutations.c b/ascon/Implementations/crypto_aead/ascon128v12/bi32_2shares/permutations.c new file mode 100644 index 0000000..1bca2ef --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_2shares/permutations.c @@ -0,0 +1,35 @@ +#include "permutations.h" + +#include "round.h" + +#if !ASCON_UNROLL_LOOPS || ASCON_SINGLE_PERM + +const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, + {0x6, 0xc}, {0x3, 0xc}, {0x6, 0x9}, {0x3, 0x9}, + {0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}}; + +#endif + +#if ASCON_INLINE_PERM + +#elif ASCON_SINGLE_PERM + +void P(state_t* s, uint8_t rounds) { + printstate(" permutation input", s); + for (int i = START(rounds); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s) { P12ROUNDS(s); } + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16 +void P8(state_t* s) { P8ROUNDS(s); } +#endif + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 +void P6(state_t* s) { P6ROUNDS(s); } +#endif + +#endif diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_2shares/permutations.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_2shares/permutations.h new file mode 100644 index 0000000..ef338f1 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_2shares/permutations.h @@ -0,0 +1,189 @@ +#ifndef PERMUTATIONS_H_ +#define PERMUTATIONS_H_ + +#include + +#include "api.h" +#include "ascon.h" +#include "printstate.h" +#include "round.h" + +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 + +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 + +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 + +#define ASCON_HASH_BYTES 32 + +#define ASCON_128_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_128A_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) + +#define ASCON_80PQ_IV \ + U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_HASH_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) + +#define ASCON_XOF_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) + +#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) +#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) +#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) +#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) +#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) + +#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) +#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) +#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) +#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) +#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 +#define IV ASCON_128_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#if ASCON_RATE == 16 +#define IV ASCON_128A_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define PB P8 +#endif + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 +#define IV ASCON_80PQ_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#define START(n) (12 - n) + +#if ASCON_UNROLL_LOOPS + +__forceinline void P12ROUNDS(state_t* s) { + ROUND(s, 0xc, 0xc); + ROUND(s, 0x9, 0xc); + ROUND(s, 0xc, 0x9); + ROUND(s, 0x9, 0x9); + ROUND(s, 0x6, 0xc); + ROUND(s, 0x3, 0xc); + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +__forceinline void P8ROUNDS(state_t* s) { + ROUND(s, 0x6, 0xc); + ROUND(s, 0x3, 0xc); + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +__forceinline void P6ROUNDS(state_t* s) { + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +#else /* !ASCON_UNROLL_LOOPS */ + +extern const uint8_t constants[][2]; + +__forceinline void P12ROUNDS(state_t* s) { + for (int i = START(12); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +__forceinline void P8ROUNDS(state_t* s) { + for (int i = START(8); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +__forceinline void P6ROUNDS(state_t* s) { + for (int i = START(6); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +#endif + +#if ASCON_INLINE_PERM + +__forceinline void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +__forceinline void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} + +__forceinline void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#elif ASCON_SINGLE_PERM + +#define P12(s) P(s, 12) +#define P8(s) P(s, 8) +#define P6(s) P(s, 6) + +void P(state_t* s, uint8_t rounds); + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s); +void P8(state_t* s); +void P6(state_t* s); + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#endif + +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_2shares/printstate.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_2shares/printstate.h new file mode 100644 index 0000000..34bd476 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_2shares/printstate.h @@ -0,0 +1,31 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_2shares/random.c b/ascon/Implementations/crypto_aead/ascon128v12/bi32_2shares/random.c new file mode 100644 index 0000000..81d0081 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_2shares/random.c @@ -0,0 +1,62 @@ + +#include "random.h" + +#include +#include + +#include "config.h" + +#if ASCON_MASK_RNG == 'R' + +void randinit() { srand(time(0)); } + +uint32_t rand32() { + uint32_t r; + randombytes(&r, 4); + return r; +} + +uint64_t rand64() { + uint64_t r; + randombytes(&r, 8); + return r; +} + +#elif ASCON_MASK_RNG == 'S' + +void randinit() { srand(time(0)); } + +uint32_t rand32() { return ((uint32_t)rand() << 21) ^ rand(); } + +uint64_t rand64() { + return ((uint64_t)rand() << 43) ^ ((uint64_t)rand() << 21) ^ rand(); +} + +#elif ASCON_MASK_RNG == 'X' + +uint32_t xorshift32; +uint64_t xorshift64; + +void randinit() { + srand(time(0)); + xorshift32 = rand(); + xorshift64 = (uint64_t)rand() << 32 | rand(); +} + +uint32_t rand32() { + uint32_t x = xorshift32; + x ^= x << 13; + x ^= x >> 17; + x ^= x << 5; + return xorshift32 = x; +} + +uint64_t rand64() { + uint64_t x = xorshift64; + x ^= x << 13; + x ^= x >> 7; + x ^= x << 17; + return xorshift64 = x; +} + +#endif diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_2shares/random.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_2shares/random.h new file mode 100644 index 0000000..89226b6 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_2shares/random.h @@ -0,0 +1,10 @@ +#ifndef RANDOM_H_ +#define RANDOM_H_ + +#include + +void randinit(); +uint32_t rand32(); +uint64_t rand64(); + +#endif /* RANDOM_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_2shares/round.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_2shares/round.h new file mode 100644 index 0000000..77acbd3 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_2shares/round.h @@ -0,0 +1,142 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" +#include "random.h" + +__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { + *K0 = TOSHARES(0); + *K1 = TOSHARES(0); + *K2 = TOSHARES(0); +} + +__forceinline void PINIT(state_t* s) { + randinit(); + s->x0 = TOSHARES(0); + s->x1 = TOSHARES(0); + s->x2 = TOSHARES(0); + s->x3 = TOSHARES(0); + s->x4 = TOSHARES(0); + s->rx = TOSHARES(0); +} + +#define TOFFOLI(a0, a1, b0, b1, c0, c1) \ + do { \ + (a0) ^= (~(b0)) & (c1); \ + (a0) ^= (~(b0)) & (c0); \ + (a1) ^= (b1) & (c1); \ + (a1) ^= (b1) & (c0); \ + } while (0) + +__forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) { + state_t t; + /* refresh randomness */ + /* s->rx = TOSHARES(0); */ + /* addition of round constant */ + s->x2.s0.e ^= C_e; + /* substitution layer */ + s->x0.s0.e ^= s->x4.s0.e; + s->x4.s0.e ^= s->x3.s0.e; + s->x2.s0.e ^= s->x1.s0.e; + s->x0.s1.e ^= s->x4.s1.e; + s->x4.s1.e ^= s->x3.s1.e; + s->x2.s1.e ^= s->x1.s1.e; + /* start of shared keccak s-box from https://eprint.iacr.org/2019/536 */ + s->rx.s0.e = s->rx.s1.e; + TOFFOLI(s->rx.s0.e, s->rx.s1.e, s->x4.s0.e, s->x4.s1.e, s->x0.s0.e, + s->x0.s1.e); + TOFFOLI(s->x0.s0.e, s->x0.s1.e, s->x1.s0.e, s->x1.s1.e, s->x2.s0.e, + s->x2.s1.e); + TOFFOLI(s->x2.s0.e, s->x2.s1.e, s->x3.s0.e, s->x3.s1.e, s->x4.s0.e, + s->x4.s1.e); + TOFFOLI(s->x4.s0.e, s->x4.s1.e, s->x0.s0.e, s->x0.s1.e, s->x1.s0.e, + s->x1.s1.e); + TOFFOLI(s->x1.s0.e, s->x1.s1.e, s->x2.s0.e, s->x2.s1.e, s->x3.s0.e, + s->x3.s1.e); + s->x3.s1.e ^= s->rx.s1.e; + s->x3.s0.e ^= s->rx.s0.e; + /* end of shared keccak s-box */ + s->x1.s0.e ^= s->x0.s0.e; + s->x0.s0.e ^= s->x4.s0.e; + s->x3.s0.e ^= s->x2.s0.e; + s->x2.s0.e = ~s->x2.s0.e; + s->x1.s1.e ^= s->x0.s1.e; + s->x0.s1.e ^= s->x4.s1.e; + s->x3.s1.e ^= s->x2.s1.e; + /* addition of round constant */ + s->x2.s0.o ^= C_o; + /* substitution layer */ + s->x0.s0.o ^= s->x4.s0.o; + s->x4.s0.o ^= s->x3.s0.o; + s->x2.s0.o ^= s->x1.s0.o; + s->x0.s1.o ^= s->x4.s1.o; + s->x4.s1.o ^= s->x3.s1.o; + s->x2.s1.o ^= s->x1.s1.o; + /* start of shared keccak s-box from https://eprint.iacr.org/2019/536 */ + s->rx.s0.o = s->rx.s1.o; + TOFFOLI(s->rx.s0.o, s->rx.s1.o, s->x4.s0.o, s->x4.s1.o, s->x0.s0.o, + s->x0.s1.o); + TOFFOLI(s->x0.s0.o, s->x0.s1.o, s->x1.s0.o, s->x1.s1.o, s->x2.s0.o, + s->x2.s1.o); + TOFFOLI(s->x2.s0.o, s->x2.s1.o, s->x3.s0.o, s->x3.s1.o, s->x4.s0.o, + s->x4.s1.o); + TOFFOLI(s->x4.s0.o, s->x4.s1.o, s->x0.s0.o, s->x0.s1.o, s->x1.s0.o, + s->x1.s1.o); + TOFFOLI(s->x1.s0.o, s->x1.s1.o, s->x2.s0.o, s->x2.s1.o, s->x3.s0.o, + s->x3.s1.o); + s->x3.s1.o ^= s->rx.s1.o; + s->x3.s0.o ^= s->rx.s0.o; + /* end of shared keccak s-box */ + s->x1.s0.o ^= s->x0.s0.o; + s->x0.s0.o ^= s->x4.s0.o; + s->x3.s0.o ^= s->x2.s0.o; + s->x2.s0.o = ~s->x2.s0.o; + s->x1.s1.o ^= s->x0.s1.o; + s->x0.s1.o ^= s->x4.s1.o; + s->x3.s1.o ^= s->x2.s1.o; + /* linear diffusion layer */ + t.x0.s1.e = s->x0.s1.e ^ ROR32(s->x0.s1.o, 4); + t.x0.s1.o = s->x0.s1.o ^ ROR32(s->x0.s1.e, 5); + t.x1.s1.e = s->x1.s1.e ^ ROR32(s->x1.s1.e, 11); + t.x1.s1.o = s->x1.s1.o ^ ROR32(s->x1.s1.o, 11); + t.x2.s1.e = s->x2.s1.e ^ ROR32(s->x2.s1.o, 2); + t.x2.s1.o = s->x2.s1.o ^ ROR32(s->x2.s1.e, 3); + t.x3.s1.e = s->x3.s1.e ^ ROR32(s->x3.s1.o, 3); + t.x3.s1.o = s->x3.s1.o ^ ROR32(s->x3.s1.e, 4); + t.x4.s1.e = s->x4.s1.e ^ ROR32(s->x4.s1.e, 17); + t.x4.s1.o = s->x4.s1.o ^ ROR32(s->x4.s1.o, 17); + s->x0.s1.e ^= ROR32(t.x0.s1.o, 9); + s->x0.s1.o ^= ROR32(t.x0.s1.e, 10); + s->x1.s1.e ^= ROR32(t.x1.s1.o, 19); + s->x1.s1.o ^= ROR32(t.x1.s1.e, 20); + s->x2.s1.e ^= t.x2.s1.o; + s->x2.s1.o ^= ROR32(t.x2.s1.e, 1); + s->x3.s1.e ^= ROR32(t.x3.s1.e, 5); + s->x3.s1.o ^= ROR32(t.x3.s1.o, 5); + s->x4.s1.e ^= ROR32(t.x4.s1.o, 3); + s->x4.s1.o ^= ROR32(t.x4.s1.e, 4); + t.x0.s0.e = s->x0.s0.e ^ ROR32(s->x0.s0.o, 4); + t.x0.s0.o = s->x0.s0.o ^ ROR32(s->x0.s0.e, 5); + t.x1.s0.e = s->x1.s0.e ^ ROR32(s->x1.s0.e, 11); + t.x1.s0.o = s->x1.s0.o ^ ROR32(s->x1.s0.o, 11); + t.x2.s0.e = s->x2.s0.e ^ ROR32(s->x2.s0.o, 2); + t.x2.s0.o = s->x2.s0.o ^ ROR32(s->x2.s0.e, 3); + t.x3.s0.e = s->x3.s0.e ^ ROR32(s->x3.s0.o, 3); + t.x3.s0.o = s->x3.s0.o ^ ROR32(s->x3.s0.e, 4); + t.x4.s0.e = s->x4.s0.e ^ ROR32(s->x4.s0.e, 17); + t.x4.s0.o = s->x4.s0.o ^ ROR32(s->x4.s0.o, 17); + s->x0.s0.e ^= ROR32(t.x0.s0.o, 9); + s->x0.s0.o ^= ROR32(t.x0.s0.e, 10); + s->x1.s0.e ^= ROR32(t.x1.s0.o, 19); + s->x1.s0.o ^= ROR32(t.x1.s0.e, 20); + s->x2.s0.e ^= t.x2.s0.o; + s->x2.s0.o ^= ROR32(t.x2.s0.e, 1); + s->x3.s0.e ^= ROR32(t.x3.s0.e, 5); + s->x3.s0.o ^= ROR32(t.x3.s0.o, 5); + s->x4.s0.e ^= ROR32(t.x4.s0.o, 3); + s->x4.s0.o ^= ROR32(t.x4.s0.e, 4); + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_2shares/word.c b/ascon/Implementations/crypto_aead/ascon128v12/bi32_2shares/word.c new file mode 100644 index 0000000..b2dc0f3 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_2shares/word.c @@ -0,0 +1,40 @@ +#include "word.h" + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +share_t TOBI32(share_t in) { + uint32_t r0, r1; + uint32_t lo = in.e; + uint32_t hi = in.o; + r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); + r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); + r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); + r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); + r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); + r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); + r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); + r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); + r0 = (lo & 0x0000FFFF) | (hi << 16); + r1 = (lo >> 16) | (hi & 0xFFFF0000); + in.e = r0; + in.o = r1; + return in; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +share_t FROMBI32(share_t in) { + uint32_t r0 = in.e; + uint32_t r1 = in.o; + uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); + uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000); + r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); + r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); + r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); + r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); + r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); + r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); + r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); + r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); + in.e = lo; + in.o = hi; + return in; +} diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_2shares/word.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_2shares/word.h new file mode 100644 index 0000000..6635995 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_2shares/word.h @@ -0,0 +1,122 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#include "config.h" +#include "random.h" + +typedef struct { + uint32_t e; + uint32_t o; +} share_t; + +typedef struct { + share_t s0; + share_t s1; +} word_t; + +__forceinline word_t WORD_T(uint64_t x) { + word_t w; + w.s0.e = (uint32_t)x; + w.s0.o = x >> 32; + w.s1.e = 0; + w.s1.o = 0; + return w; +} + +__forceinline uint64_t UINT64_T(word_t w) { + return (uint64_t)w.s0.o << 32 | w.s0.e; +} + +share_t TOBI32(share_t in); + +share_t FROMBI32(share_t in); + +__forceinline word_t TOSHARES(uint64_t in) { + uint32_t r0 = rand32(); + uint32_t r1 = rand32(); + word_t w; + w.s0.e = (uint32_t)in ^ r0; + w.s0.o = (in >> 32) ^ r1; + w.s1.e = r0; + w.s1.o = r1; + return w; +} + +__forceinline uint64_t FROMSHARES(word_t in) { + return (uint64_t)(in.s0.o ^ in.s1.o) << 32 | (in.s0.e ^ in.s1.e); +} + +__forceinline word_t U64TOWORD(uint64_t x) { +#if ASCON_MASK_LOADS + word_t w = TOSHARES(x); + w.s0 = TOBI32(w.s0); + w.s1 = TOBI32(w.s1); + return w; +#else + word_t w = WORD_T(x); + w.s0 = TOBI32(w.s0); + return w; +#endif +} + +__forceinline uint64_t WORDTOU64(word_t w) { + w.s0 = FROMBI32(w.s0); + w.s1 = FROMBI32(w.s1); + return (uint64_t)FROMSHARES(w); +} + +#define XOR(a, b) \ + do { \ + word_t tb = b; \ + (a).s0.e ^= tb.s0.e; \ + (a).s0.o ^= tb.s0.o; \ + (a).s1.e ^= tb.s1.e; \ + (a).s1.o ^= tb.s1.o; \ + } while (0) + +#define AND(a, b) \ + do { \ + word_t ta = a; \ + word_t tb = b; \ + (a).s0.e = (ta.s0.e & tb.s0.e) ^ (ta.s0.e & tb.s1.e); \ + (a).s0.o = (ta.s0.o & tb.s0.e) ^ (ta.s0.o & tb.s1.o); \ + (a).s1.e = (ta.s1.e & tb.s0.e) ^ (ta.s1.e & tb.s1.e); \ + (a).s1.o = (ta.s1.o & tb.s0.o) ^ (ta.s1.o & tb.s1.o); \ + } while (0) + +__forceinline uint32_t ROR32(uint32_t x, int n) { + return x >> n | x << (32 - n); +} + +__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { + word_t r; + r.s0.e = lo2hi.s0.e << 16 | hi2lo.s0.e >> 16; + r.s0.o = lo2hi.s0.o << 16 | hi2lo.s0.o >> 16; + r.s1.e = lo2hi.s1.e << 16 | hi2lo.s1.e >> 16; + r.s1.o = lo2hi.s1.o << 16 | hi2lo.s1.o >> 16; + return r; +} + +__forceinline int NOTZERO(word_t a, word_t b) { + int result = 0; + for (int i = 0; i < 8; ++i) + result |= ((uint8_t*)&(a.s0))[i] ^ ((uint8_t*)&(a.s1))[i]; + for (int i = 0; i < 8; ++i) + result |= ((uint8_t*)&(b.s0))[i] ^ ((uint8_t*)&(b.s1))[i]; + return result; +} + +/* set padding byte in 64-bit Ascon word */ +__forceinline word_t PAD(int i) { + return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32); +} + +/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ +__forceinline word_t XMASK(int n) { + uint32_t mask = 0x0fffffff >> (n * 4 - 4); + return WORD_T((uint64_t)mask << 32 | mask); +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_3shares/api.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_3shares/api.h new file mode 100644 index 0000000..96a7c47 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_3shares/api.h @@ -0,0 +1,6 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 +#define ASCON_RATE 8 diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_3shares/ascon.c b/ascon/Implementations/crypto_aead/ascon128v12/bi32_3shares/ascon.c new file mode 100644 index 0000000..9011a77 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_3shares/ascon.c @@ -0,0 +1,250 @@ +#include "ascon.h" + +#include "api.h" +#include "loadstore.h" +#include "permutations.h" +#include "printstate.h" + +__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2, + const uint8_t* k) { + KINIT(K0, K1, K2); + if (CRYPTO_KEYBYTES == 20) { + XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4))); + k += 4; + } + XOR(*K1, LOAD64(k)); + XOR(*K2, LOAD64(k + 8)); +} + +__forceinline void init(state_t* s, const uint8_t* npub, const uint8_t* k) { + word_t N0, N1; + word_t K0, K1, K2; + /* load nonce */ + N0 = LOAD64(npub); + N1 = LOAD64(npub + 8); + /* load key */ + loadkey(&K0, &K1, &K2, k); + /* initialization */ + PINIT(s); + XOR(s->x0, IV); + if (CRYPTO_KEYBYTES == 20) XOR(s->x0, K0); + XOR(s->x1, K1); + XOR(s->x2, K2); + XOR(s->x3, N0); + XOR(s->x4, N1); + P12(s); + if (CRYPTO_KEYBYTES == 20) XOR(s->x2, K0); + XOR(s->x3, K1); + XOR(s->x4, K2); + printstate("initialization", s); +} + +__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + word_t* restrict px; + /* process associated data */ + if (adlen) { + while (adlen >= ASCON_RATE) { + XOR(s->x0, LOAD64(ad)); + if (ASCON_RATE == 16) XOR(s->x1, LOAD64(ad + 8)); + PB(s); + ad += ASCON_RATE; + adlen -= ASCON_RATE; + } + /* final associated data block */ + px = &s->x0; + if (ASCON_RATE == 16 && adlen >= 8) { + XOR(s->x0, LOAD64(ad)); + px = &s->x1; + ad += 8; + adlen -= 8; + } + if (adlen) XOR(*px, LOAD(ad, adlen)); + XOR(*px, PAD(adlen)); + PB(s); + } + XOR(s->x4, WORD_T(1)); + printstate("process associated data", s); +} + +__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m, + uint64_t mlen) { + word_t* restrict px; + /* process plaintext */ + while (mlen >= ASCON_RATE) { + XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + if (ASCON_RATE == 16) { + XOR(s->x1, LOAD64(m + 8)); + STORE64(c + 8, s->x1); + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + mlen -= ASCON_RATE; + } + /* final plaintext block */ + px = &s->x0; + if (ASCON_RATE == 16 && mlen >= 8) { + XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + px = &s->x1; + m += 8; + c += 8; + mlen -= 8; + } + if (mlen) { + XOR(*px, LOAD(m, mlen)); + STORE(c, *px, mlen); + } + XOR(*px, PAD(mlen)); + printstate("process plaintext", s); +} + +__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c, + uint64_t clen) { + word_t* restrict px; + word_t cx; + /* process ciphertext */ + while (clen >= ASCON_RATE) { + cx = LOAD64(c); + XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + if (ASCON_RATE == 16) { + cx = LOAD64(c + 8); + XOR(s->x1, cx); + STORE64(m + 8, s->x1); + s->x1 = cx; + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + clen -= ASCON_RATE; + } + /* final ciphertext block */ + px = &s->x0; + if (ASCON_RATE == 16 && clen >= 8) { + cx = LOAD64(c); + XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + px = &s->x1; + m += 8; + c += 8; + clen -= 8; + } + if (clen) { + cx = LOAD(c, clen); + XOR(*px, cx); + STORE(m, *px, clen); + AND(*px, XMASK(clen)); + XOR(*px, cx); + } + XOR(*px, PAD(clen)); + printstate("process ciphertext", s); +} + +__forceinline void final(state_t* s, const uint8_t* k) { + word_t K0, K1, K2; + /* load key */ + loadkey(&K0, &K1, &K2, k); + /* finalization */ + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) { + XOR(s->x1, K1); + XOR(s->x2, K2); + } + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { + XOR(s->x2, K1); + XOR(s->x3, K2); + } + if (CRYPTO_KEYBYTES == 20) { + XOR(s->x1, KEYROT(K0, K1)); + XOR(s->x2, KEYROT(K1, K2)); + XOR(s->x3, KEYROT(K2, WORD_T(0))); + } + P12(s); + XOR(s->x3, K1); + XOR(s->x4, K2); + printstate("finalization", s); +} + +#if !ASCON_INLINE_MODE + +#define INIT ascon_init +#define ABSORB ascon_absorb +#define ENCRYPT ascon_encrypt +#define DECRYPT ascon_decrypt +#define FINAL ascon_final + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) { + init(s, npub, k); +} + +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + absorb(s, ad, adlen); +} + +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) { + encrypt(s, c, m, mlen); +} + +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) { + decrypt(s, m, c, clen); +} + +void ascon_final(state_t* s, const uint8_t* k) { final(s, k); } + +#else + +#define INIT init +#define ABSORB absorb +#define ENCRYPT encrypt +#define DECRYPT decrypt +#define FINAL final + +#endif + +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + state_t s; + (void)nsec; + *clen = mlen + CRYPTO_ABYTES; + /* perform ascon computation */ + INIT(&s, npub, k); + ABSORB(&s, ad, adlen); + ENCRYPT(&s, c, m, mlen); + FINAL(&s, k); + /* set tag */ + c += mlen; + STORE64(c, s.x3); + STORE64(c + 8, s.x4); + return 0; +} + +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { + state_t s; + (void)nsec; + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + *mlen = clen = clen - CRYPTO_ABYTES; + /* perform ascon computation */ + INIT(&s, npub, k); + ABSORB(&s, ad, adlen); + DECRYPT(&s, m, c, clen); + FINAL(&s, k); + /* verify tag (should be constant time, check compiler output) */ + c += clen; + XOR(s.x3, LOAD64(c)); + XOR(s.x4, LOAD64(c + 8)); + if (NOTZERO(s.x3, s.x4)) { + *mlen = 0; + return -1; + } + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_3shares/ascon.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_3shares/ascon.h new file mode 100644 index 0000000..d11fa01 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_3shares/ascon.h @@ -0,0 +1,20 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#include "config.h" +#include "word.h" + +typedef struct { + word_t x0, x1, x2, x3, x4; + word_t rx; +} state_t; + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k); +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen); +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); +void ascon_final(state_t* s, const uint8_t* k); + +#endif // ASCON_H_ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_3shares/config.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_3shares/config.h new file mode 100644 index 0000000..e1a4d6e --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_3shares/config.h @@ -0,0 +1,44 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* inline the Ascon mode */ +#ifndef ASCON_INLINE_MODE +#define ASCON_INLINE_MODE 1 +#endif + +/* inline the Ascon permutations */ +#ifndef ASCON_INLINE_PERM +#define ASCON_INLINE_PERM 0 +#endif + +/* single function for all permutations */ +#ifndef ASCON_SINGLE_PERM +#define ASCON_SINGLE_PERM 0 +#endif + +/* unroll the permutation loops */ +#ifndef ASCON_UNROLL_LOOPS +#define ASCON_UNROLL_LOOPS 0 +#endif + +/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ +#ifndef ASCON_DATA_ACCESS +#define ASCON_DATA_ACCESS 'B' +#endif + +/* Ascon mask generator { 'S'tdlib, 'X'orshift, 'R'andombytes } */ +#ifndef ASCON_MASK_RNG +#define ASCON_MASK_RNG 'X' +#endif + +/* mask key/data loads */ +#ifndef ASCON_MASK_LOADS +#define ASCON_MASK_LOADS 0 +#endif + +/* make sure __forceinline is supported */ +#ifndef __forceinline +#define __forceinline inline __attribute__((always_inline)) +#endif + +#endif /* CONFIG_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_3shares/endian.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_3shares/endian.h new file mode 100644 index 0000000..3944360 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_3shares/endian.h @@ -0,0 +1,39 @@ +#ifndef ENDIAN_H_ +#define ENDIAN_H_ + +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif +#define U64BIG(x) (x) +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +#elif defined(_MSC_VER) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) +#define U32BIG(x) \ + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) + +#else +#error "Ascon byte order macros not defined in endian.h" +#endif + +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_3shares/implementors b/ascon/Implementations/crypto_aead/ascon128v12/bi32_3shares/implementors new file mode 100644 index 0000000..b110c1a --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_3shares/implementors @@ -0,0 +1,2 @@ +Christoph Dobraunig +Martin Schläffer diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_3shares/loadstore.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_3shares/loadstore.h new file mode 100644 index 0000000..6cb0cca --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_3shares/loadstore.h @@ -0,0 +1,118 @@ +#ifndef LOADSTORE_H_ +#define LOADSTORE_H_ + +#include + +#include "config.h" +#include "endian.h" +#include "word.h" + +/* 64-bit LSB mask (undefined for n == 0) */ +#define MASK(n) (~0ull >> (64 - (n))) + +/* get byte from Ascon 64-bit word */ +#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i)))) + +/* set byte in Ascon 64-bit word */ +#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i))) + +#if ASCON_DATA_ACCESS == 'W' + +#ifndef NDEBUG +#pragma message("Using wordwise data access") +#endif + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = U64BIG(*(uint64_t*)bytes); + return U64TOWORD(x); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = U64BIG(*(uint64_t*)bytes & MASK(8 * n)); + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(8 * n); + *(uint64_t*)bytes |= U64BIG(x); +} + +#elif ASCON_DATA_ACCESS == 'M' + +#ifndef NDEBUG +#pragma message("Using memcpy to access data") +#endif + +#include + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + memcpy((uint8_t*)&x, bytes, n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = U64BIG(WORDTOU64(w)); + memcpy(bytes, (uint8_t*)&x, n); +} + +#elif ASCON_DATA_ACCESS == 'B' + +#ifndef NDEBUG +#pragma message("Using bytewise data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#elif ASCON_DATA_ACCESS == 'H' + +#ifndef NDEBUG +#pragma message("Using hybrid data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + if (n == 8) + x = U64BIG(*(uint64_t*)bytes); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + if (n == 8) + *(uint64_t*)bytes = U64BIG(x); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#else +#error "Ascon data access macro not defined correctly" +#endif + +#endif /* LOADSTORE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_3shares/permutations.c b/ascon/Implementations/crypto_aead/ascon128v12/bi32_3shares/permutations.c new file mode 100644 index 0000000..1bca2ef --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_3shares/permutations.c @@ -0,0 +1,35 @@ +#include "permutations.h" + +#include "round.h" + +#if !ASCON_UNROLL_LOOPS || ASCON_SINGLE_PERM + +const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, + {0x6, 0xc}, {0x3, 0xc}, {0x6, 0x9}, {0x3, 0x9}, + {0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}}; + +#endif + +#if ASCON_INLINE_PERM + +#elif ASCON_SINGLE_PERM + +void P(state_t* s, uint8_t rounds) { + printstate(" permutation input", s); + for (int i = START(rounds); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s) { P12ROUNDS(s); } + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16 +void P8(state_t* s) { P8ROUNDS(s); } +#endif + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 +void P6(state_t* s) { P6ROUNDS(s); } +#endif + +#endif diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_3shares/permutations.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_3shares/permutations.h new file mode 100644 index 0000000..ef338f1 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_3shares/permutations.h @@ -0,0 +1,189 @@ +#ifndef PERMUTATIONS_H_ +#define PERMUTATIONS_H_ + +#include + +#include "api.h" +#include "ascon.h" +#include "printstate.h" +#include "round.h" + +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 + +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 + +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 + +#define ASCON_HASH_BYTES 32 + +#define ASCON_128_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_128A_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) + +#define ASCON_80PQ_IV \ + U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_HASH_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) + +#define ASCON_XOF_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) + +#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) +#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) +#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) +#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) +#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) + +#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) +#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) +#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) +#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) +#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 +#define IV ASCON_128_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#if ASCON_RATE == 16 +#define IV ASCON_128A_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define PB P8 +#endif + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 +#define IV ASCON_80PQ_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#define START(n) (12 - n) + +#if ASCON_UNROLL_LOOPS + +__forceinline void P12ROUNDS(state_t* s) { + ROUND(s, 0xc, 0xc); + ROUND(s, 0x9, 0xc); + ROUND(s, 0xc, 0x9); + ROUND(s, 0x9, 0x9); + ROUND(s, 0x6, 0xc); + ROUND(s, 0x3, 0xc); + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +__forceinline void P8ROUNDS(state_t* s) { + ROUND(s, 0x6, 0xc); + ROUND(s, 0x3, 0xc); + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +__forceinline void P6ROUNDS(state_t* s) { + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +#else /* !ASCON_UNROLL_LOOPS */ + +extern const uint8_t constants[][2]; + +__forceinline void P12ROUNDS(state_t* s) { + for (int i = START(12); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +__forceinline void P8ROUNDS(state_t* s) { + for (int i = START(8); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +__forceinline void P6ROUNDS(state_t* s) { + for (int i = START(6); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +#endif + +#if ASCON_INLINE_PERM + +__forceinline void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +__forceinline void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} + +__forceinline void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#elif ASCON_SINGLE_PERM + +#define P12(s) P(s, 12) +#define P8(s) P(s, 8) +#define P6(s) P(s, 6) + +void P(state_t* s, uint8_t rounds); + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s); +void P8(state_t* s); +void P6(state_t* s); + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#endif + +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_3shares/printstate.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_3shares/printstate.h new file mode 100644 index 0000000..34bd476 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_3shares/printstate.h @@ -0,0 +1,31 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_3shares/random.c b/ascon/Implementations/crypto_aead/ascon128v12/bi32_3shares/random.c new file mode 100644 index 0000000..81d0081 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_3shares/random.c @@ -0,0 +1,62 @@ + +#include "random.h" + +#include +#include + +#include "config.h" + +#if ASCON_MASK_RNG == 'R' + +void randinit() { srand(time(0)); } + +uint32_t rand32() { + uint32_t r; + randombytes(&r, 4); + return r; +} + +uint64_t rand64() { + uint64_t r; + randombytes(&r, 8); + return r; +} + +#elif ASCON_MASK_RNG == 'S' + +void randinit() { srand(time(0)); } + +uint32_t rand32() { return ((uint32_t)rand() << 21) ^ rand(); } + +uint64_t rand64() { + return ((uint64_t)rand() << 43) ^ ((uint64_t)rand() << 21) ^ rand(); +} + +#elif ASCON_MASK_RNG == 'X' + +uint32_t xorshift32; +uint64_t xorshift64; + +void randinit() { + srand(time(0)); + xorshift32 = rand(); + xorshift64 = (uint64_t)rand() << 32 | rand(); +} + +uint32_t rand32() { + uint32_t x = xorshift32; + x ^= x << 13; + x ^= x >> 17; + x ^= x << 5; + return xorshift32 = x; +} + +uint64_t rand64() { + uint64_t x = xorshift64; + x ^= x << 13; + x ^= x >> 7; + x ^= x << 17; + return xorshift64 = x; +} + +#endif diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_3shares/random.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_3shares/random.h new file mode 100644 index 0000000..89226b6 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_3shares/random.h @@ -0,0 +1,10 @@ +#ifndef RANDOM_H_ +#define RANDOM_H_ + +#include + +void randinit(); +uint32_t rand32(); +uint64_t rand64(); + +#endif /* RANDOM_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_3shares/round.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_3shares/round.h new file mode 100644 index 0000000..ddaf2d4 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_3shares/round.h @@ -0,0 +1,183 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" +#include "random.h" + +__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { + *K0 = TOSHARES(0); + *K1 = TOSHARES(0); + *K2 = TOSHARES(0); +} + +__forceinline void PINIT(state_t* s) { + randinit(); + s->x0 = TOSHARES(0); + s->x1 = TOSHARES(0); + s->x2 = TOSHARES(0); + s->x3 = TOSHARES(0); + s->x4 = TOSHARES(0); + s->rx = TOSHARES(0); +} + +#define TOFFOLI(a0, a1, a2, b0, b1, b2, c0, c1, c2) \ + do { \ + (a0) ^= (~(b0)) & (c0); \ + (a0) ^= (b0) & (c2); \ + (a0) ^= (b2) & (c0); \ + (a1) ^= (~(b1)) & (c1); \ + (a1) ^= (b1) & (c0); \ + (a1) ^= (b0) & (c1); \ + (a2) ^= (~(b2)) & (c2); \ + (a2) ^= (b2) & (c1); \ + (a2) ^= (b1) & (c2); \ + } while (0) + +__forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) { + state_t t; + /* refresh randomness */ + /* s->rx = TOSHARES(0); */ + /* addition of round constant */ + s->x2.s0.e ^= C_e; + /* substitution layer */ + s->x0.s0.e ^= s->x4.s0.e; + s->x4.s0.e ^= s->x3.s0.e; + s->x2.s0.e ^= s->x1.s0.e; + s->x0.s1.e ^= s->x4.s1.e; + s->x4.s1.e ^= s->x3.s1.e; + s->x2.s1.e ^= s->x1.s1.e; + s->x0.s2.e ^= s->x4.s2.e; + s->x4.s2.e ^= s->x3.s2.e; + s->x2.s2.e ^= s->x1.s2.e; + /* start of shared keccak s-box from https://eprint.iacr.org/2019/536 */ + s->rx.s2.e = s->rx.s0.e; + s->rx.s0.e ^= s->rx.s1.e; + TOFFOLI(s->rx.s0.e, s->rx.s1.e, s->rx.s2.e, s->x4.s0.e, s->x4.s1.e, + s->x4.s2.e, s->x0.s0.e, s->x0.s1.e, s->x0.s2.e); + TOFFOLI(s->x0.s0.e, s->x0.s1.e, s->x0.s2.e, s->x1.s0.e, s->x1.s1.e, + s->x1.s2.e, s->x2.s0.e, s->x2.s1.e, s->x2.s2.e); + TOFFOLI(s->x2.s0.e, s->x2.s1.e, s->x2.s2.e, s->x3.s0.e, s->x3.s1.e, + s->x3.s2.e, s->x4.s0.e, s->x4.s1.e, s->x4.s2.e); + TOFFOLI(s->x4.s0.e, s->x4.s1.e, s->x4.s2.e, s->x0.s0.e, s->x0.s1.e, + s->x0.s2.e, s->x1.s0.e, s->x1.s1.e, s->x1.s2.e); + TOFFOLI(s->x1.s0.e, s->x1.s1.e, s->x1.s2.e, s->x2.s0.e, s->x2.s1.e, + s->x2.s2.e, s->x3.s0.e, s->x3.s1.e, s->x3.s2.e); + s->x3.s2.e ^= s->rx.s2.e; + s->x3.s1.e ^= s->rx.s1.e; + s->x3.s0.e ^= s->rx.s0.e; + /* end of shared keccak s-box */ + s->x1.s0.e ^= s->x0.s0.e; + s->x0.s0.e ^= s->x4.s0.e; + s->x3.s0.e ^= s->x2.s0.e; + s->x2.s0.e = ~s->x2.s0.e; + s->x1.s1.e ^= s->x0.s1.e; + s->x0.s1.e ^= s->x4.s1.e; + s->x3.s1.e ^= s->x2.s1.e; + s->x1.s2.e ^= s->x0.s2.e; + s->x0.s2.e ^= s->x4.s2.e; + s->x3.s2.e ^= s->x2.s2.e; + /* addition of round constant */ + s->x2.s0.o ^= C_o; + /* substitution layer */ + s->x0.s0.o ^= s->x4.s0.o; + s->x4.s0.o ^= s->x3.s0.o; + s->x2.s0.o ^= s->x1.s0.o; + s->x0.s1.o ^= s->x4.s1.o; + s->x4.s1.o ^= s->x3.s1.o; + s->x2.s1.o ^= s->x1.s1.o; + s->x0.s2.o ^= s->x4.s2.o; + s->x4.s2.o ^= s->x3.s2.o; + s->x2.s2.o ^= s->x1.s2.o; + /* start of shared keccak s-box from https://eprint.iacr.org/2019/536 */ + s->rx.s2.o = s->rx.s0.o; + s->rx.s0.o ^= s->rx.s1.o; + TOFFOLI(s->rx.s0.o, s->rx.s1.o, s->rx.s2.o, s->x4.s0.o, s->x4.s1.o, + s->x4.s2.o, s->x0.s0.o, s->x0.s1.o, s->x0.s2.o); + TOFFOLI(s->x0.s0.o, s->x0.s1.o, s->x0.s2.o, s->x1.s0.o, s->x1.s1.o, + s->x1.s2.o, s->x2.s0.o, s->x2.s1.o, s->x2.s2.o); + TOFFOLI(s->x2.s0.o, s->x2.s1.o, s->x2.s2.o, s->x3.s0.o, s->x3.s1.o, + s->x3.s2.o, s->x4.s0.o, s->x4.s1.o, s->x4.s2.o); + TOFFOLI(s->x4.s0.o, s->x4.s1.o, s->x4.s2.o, s->x0.s0.o, s->x0.s1.o, + s->x0.s2.o, s->x1.s0.o, s->x1.s1.o, s->x1.s2.o); + TOFFOLI(s->x1.s0.o, s->x1.s1.o, s->x1.s2.o, s->x2.s0.o, s->x2.s1.o, + s->x2.s2.o, s->x3.s0.o, s->x3.s1.o, s->x3.s2.o); + s->x3.s2.o ^= s->rx.s2.o; + s->x3.s1.o ^= s->rx.s1.o; + s->x3.s0.o ^= s->rx.s0.o; + /* end of shared keccak s-box */ + s->x1.s0.o ^= s->x0.s0.o; + s->x0.s0.o ^= s->x4.s0.o; + s->x3.s0.o ^= s->x2.s0.o; + s->x2.s0.o = ~s->x2.s0.o; + s->x1.s1.o ^= s->x0.s1.o; + s->x0.s1.o ^= s->x4.s1.o; + s->x3.s1.o ^= s->x2.s1.o; + s->x1.s2.o ^= s->x0.s2.o; + s->x0.s2.o ^= s->x4.s2.o; + s->x3.s2.o ^= s->x2.s2.o; + /* linear diffusion layer */ + t.x0.s2.e = s->x0.s2.e ^ ROR32(s->x0.s2.o, 4); + t.x0.s2.o = s->x0.s2.o ^ ROR32(s->x0.s2.e, 5); + t.x1.s2.e = s->x1.s2.e ^ ROR32(s->x1.s2.e, 11); + t.x1.s2.o = s->x1.s2.o ^ ROR32(s->x1.s2.o, 11); + t.x2.s2.e = s->x2.s2.e ^ ROR32(s->x2.s2.o, 2); + t.x2.s2.o = s->x2.s2.o ^ ROR32(s->x2.s2.e, 3); + t.x3.s2.e = s->x3.s2.e ^ ROR32(s->x3.s2.o, 3); + t.x3.s2.o = s->x3.s2.o ^ ROR32(s->x3.s2.e, 4); + t.x4.s2.e = s->x4.s2.e ^ ROR32(s->x4.s2.e, 17); + t.x4.s2.o = s->x4.s2.o ^ ROR32(s->x4.s2.o, 17); + s->x0.s2.e ^= ROR32(t.x0.s2.o, 9); + s->x0.s2.o ^= ROR32(t.x0.s2.e, 10); + s->x1.s2.e ^= ROR32(t.x1.s2.o, 19); + s->x1.s2.o ^= ROR32(t.x1.s2.e, 20); + s->x2.s2.e ^= t.x2.s2.o; + s->x2.s2.o ^= ROR32(t.x2.s2.e, 1); + s->x3.s2.e ^= ROR32(t.x3.s2.e, 5); + s->x3.s2.o ^= ROR32(t.x3.s2.o, 5); + s->x4.s2.e ^= ROR32(t.x4.s2.o, 3); + s->x4.s2.o ^= ROR32(t.x4.s2.e, 4); + t.x0.s1.e = s->x0.s1.e ^ ROR32(s->x0.s1.o, 4); + t.x0.s1.o = s->x0.s1.o ^ ROR32(s->x0.s1.e, 5); + t.x1.s1.e = s->x1.s1.e ^ ROR32(s->x1.s1.e, 11); + t.x1.s1.o = s->x1.s1.o ^ ROR32(s->x1.s1.o, 11); + t.x2.s1.e = s->x2.s1.e ^ ROR32(s->x2.s1.o, 2); + t.x2.s1.o = s->x2.s1.o ^ ROR32(s->x2.s1.e, 3); + t.x3.s1.e = s->x3.s1.e ^ ROR32(s->x3.s1.o, 3); + t.x3.s1.o = s->x3.s1.o ^ ROR32(s->x3.s1.e, 4); + t.x4.s1.e = s->x4.s1.e ^ ROR32(s->x4.s1.e, 17); + t.x4.s1.o = s->x4.s1.o ^ ROR32(s->x4.s1.o, 17); + s->x0.s1.e ^= ROR32(t.x0.s1.o, 9); + s->x0.s1.o ^= ROR32(t.x0.s1.e, 10); + s->x1.s1.e ^= ROR32(t.x1.s1.o, 19); + s->x1.s1.o ^= ROR32(t.x1.s1.e, 20); + s->x2.s1.e ^= t.x2.s1.o; + s->x2.s1.o ^= ROR32(t.x2.s1.e, 1); + s->x3.s1.e ^= ROR32(t.x3.s1.e, 5); + s->x3.s1.o ^= ROR32(t.x3.s1.o, 5); + s->x4.s1.e ^= ROR32(t.x4.s1.o, 3); + s->x4.s1.o ^= ROR32(t.x4.s1.e, 4); + t.x0.s0.e = s->x0.s0.e ^ ROR32(s->x0.s0.o, 4); + t.x0.s0.o = s->x0.s0.o ^ ROR32(s->x0.s0.e, 5); + t.x1.s0.e = s->x1.s0.e ^ ROR32(s->x1.s0.e, 11); + t.x1.s0.o = s->x1.s0.o ^ ROR32(s->x1.s0.o, 11); + t.x2.s0.e = s->x2.s0.e ^ ROR32(s->x2.s0.o, 2); + t.x2.s0.o = s->x2.s0.o ^ ROR32(s->x2.s0.e, 3); + t.x3.s0.e = s->x3.s0.e ^ ROR32(s->x3.s0.o, 3); + t.x3.s0.o = s->x3.s0.o ^ ROR32(s->x3.s0.e, 4); + t.x4.s0.e = s->x4.s0.e ^ ROR32(s->x4.s0.e, 17); + t.x4.s0.o = s->x4.s0.o ^ ROR32(s->x4.s0.o, 17); + s->x0.s0.e ^= ROR32(t.x0.s0.o, 9); + s->x0.s0.o ^= ROR32(t.x0.s0.e, 10); + s->x1.s0.e ^= ROR32(t.x1.s0.o, 19); + s->x1.s0.o ^= ROR32(t.x1.s0.e, 20); + s->x2.s0.e ^= t.x2.s0.o; + s->x2.s0.o ^= ROR32(t.x2.s0.e, 1); + s->x3.s0.e ^= ROR32(t.x3.s0.e, 5); + s->x3.s0.o ^= ROR32(t.x3.s0.o, 5); + s->x4.s0.e ^= ROR32(t.x4.s0.o, 3); + s->x4.s0.o ^= ROR32(t.x4.s0.e, 4); + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_3shares/word.c b/ascon/Implementations/crypto_aead/ascon128v12/bi32_3shares/word.c new file mode 100644 index 0000000..b2dc0f3 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_3shares/word.c @@ -0,0 +1,40 @@ +#include "word.h" + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +share_t TOBI32(share_t in) { + uint32_t r0, r1; + uint32_t lo = in.e; + uint32_t hi = in.o; + r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); + r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); + r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); + r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); + r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); + r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); + r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); + r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); + r0 = (lo & 0x0000FFFF) | (hi << 16); + r1 = (lo >> 16) | (hi & 0xFFFF0000); + in.e = r0; + in.o = r1; + return in; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +share_t FROMBI32(share_t in) { + uint32_t r0 = in.e; + uint32_t r1 = in.o; + uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); + uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000); + r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); + r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); + r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); + r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); + r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); + r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); + r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); + r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); + in.e = lo; + in.o = hi; + return in; +} diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_3shares/word.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_3shares/word.h new file mode 100644 index 0000000..45c250d --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_3shares/word.h @@ -0,0 +1,146 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#include "config.h" +#include "random.h" + +typedef struct { + uint32_t e; + uint32_t o; +} share_t; + +typedef struct { + share_t s0; + share_t s1; + share_t s2; +} word_t; + +__forceinline word_t WORD_T(uint64_t x) { + word_t w; + w.s0.e = (uint32_t)x; + w.s0.o = x >> 32; + w.s1.e = 0; + w.s1.o = 0; + w.s2.e = 0; + w.s2.o = 0; + return w; +} + +__forceinline uint64_t UINT64_T(word_t w) { + return (uint64_t)w.s0.o << 32 | w.s0.e; +} + +share_t TOBI32(share_t in); + +share_t FROMBI32(share_t in); + +__forceinline word_t TOSHARES(uint64_t in) { + uint32_t r0 = rand32(); + uint32_t r1 = rand32(); + uint32_t r2 = rand32(); + uint32_t r3 = rand32(); + word_t w; + w.s0.e = (uint32_t)in ^ r0 ^ r2; + w.s0.o = (in >> 32) ^ r1 ^ r3; + w.s1.e = r0; + w.s1.o = r1; + w.s2.e = r2; + w.s2.o = r3; + return w; +} + +__forceinline uint64_t FROMSHARES(word_t in) { + return (uint64_t)(in.s0.o ^ in.s1.o ^ in.s2.o) << 32 | + (in.s0.e ^ in.s1.e ^ in.s2.e); +} + +__forceinline word_t U64TOWORD(uint64_t x) { +#if ASCON_MASK_LOADS + word_t w = TOSHARES(x); + w.s0 = TOBI32(w.s0); + w.s1 = TOBI32(w.s1); + w.s2 = TOBI32(w.s2); + return w; +#else + word_t w = WORD_T(x); + w.s0 = TOBI32(w.s0); + return w; +#endif +} + +__forceinline uint64_t WORDTOU64(word_t w) { + w.s0 = FROMBI32(w.s0); + w.s1 = FROMBI32(w.s1); + w.s2 = FROMBI32(w.s2); + return (uint64_t)FROMSHARES(w); +} + +#define XOR(a, b) \ + do { \ + word_t tb = b; \ + (a).s0.e ^= tb.s0.e; \ + (a).s0.o ^= tb.s0.o; \ + (a).s1.e ^= tb.s1.e; \ + (a).s1.o ^= tb.s1.o; \ + (a).s2.e ^= tb.s2.e; \ + (a).s2.o ^= tb.s2.o; \ + } while (0) + +#define AND(a, b) \ + do { \ + word_t ta = a; \ + word_t tb = b; \ + (a).s0.e = \ + (ta.s0.e & tb.s0.e) ^ (ta.s0.e & tb.s1.e) ^ (ta.s0.e & tb.s2.e); \ + (a).s0.o = \ + (ta.s0.o & tb.s0.o) ^ (ta.s0.o & tb.s1.o) ^ (ta.s0.o & tb.s2.o); \ + (a).s1.e = \ + (ta.s1.e & tb.s0.e) ^ (ta.s1.e & tb.s1.e) ^ (ta.s1.e & tb.s2.e); \ + (a).s1.o = \ + (ta.s1.o & tb.s0.o) ^ (ta.s1.o & tb.s1.o) ^ (ta.s1.o & tb.s2.o); \ + (a).s2.e = \ + (ta.s2.e & tb.s0.e) ^ (ta.s2.e & tb.s1.e) ^ (ta.s2.e & tb.s2.e); \ + (a).s2.o = \ + (ta.s2.o & tb.s0.o) ^ (ta.s2.o & tb.s1.o) ^ (ta.s2.o & tb.s2.o); \ + } while (0) + +__forceinline uint32_t ROR32(uint32_t x, int n) { + return x >> n | x << (32 - n); +} + +__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { + word_t r; + r.s0.e = lo2hi.s0.e << 16 | hi2lo.s0.e >> 16; + r.s0.o = lo2hi.s0.o << 16 | hi2lo.s0.o >> 16; + r.s1.e = lo2hi.s1.e << 16 | hi2lo.s1.e >> 16; + r.s1.o = lo2hi.s1.o << 16 | hi2lo.s1.o >> 16; + r.s2.e = lo2hi.s2.e << 16 | hi2lo.s2.e >> 16; + r.s2.o = lo2hi.s2.o << 16 | hi2lo.s2.o >> 16; + return r; +} + +__forceinline int NOTZERO(word_t a, word_t b) { + int result = 0; + for (int i = 0; i < 8; ++i) + result |= ((uint8_t*)&(a.s0))[i] ^ ((uint8_t*)&(a.s1))[i] ^ + ((uint8_t*)&(a.s2))[i]; + for (int i = 0; i < 8; ++i) + result |= ((uint8_t*)&(b.s0))[i] ^ ((uint8_t*)&(b.s1))[i] ^ + ((uint8_t*)&(b.s2))[i]; + return result; +} + +/* set padding byte in 64-bit Ascon word */ +__forceinline word_t PAD(int i) { + return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32); +} + +/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ +__forceinline word_t XMASK(int n) { + uint32_t mask = 0x0fffffff >> (n * 4 - 4); + return WORD_T((uint64_t)mask << 32 | mask); +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/api.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/api.h index a4aa567..96a7c47 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/api.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/api.h @@ -3,3 +3,4 @@ #define CRYPTO_NPUBBYTES 16 #define CRYPTO_ABYTES 16 #define CRYPTO_NOOVERLAP 1 +#define ASCON_RATE 8 diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/ascon.c b/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/ascon.c new file mode 100644 index 0000000..9011a77 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/ascon.c @@ -0,0 +1,250 @@ +#include "ascon.h" + +#include "api.h" +#include "loadstore.h" +#include "permutations.h" +#include "printstate.h" + +__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2, + const uint8_t* k) { + KINIT(K0, K1, K2); + if (CRYPTO_KEYBYTES == 20) { + XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4))); + k += 4; + } + XOR(*K1, LOAD64(k)); + XOR(*K2, LOAD64(k + 8)); +} + +__forceinline void init(state_t* s, const uint8_t* npub, const uint8_t* k) { + word_t N0, N1; + word_t K0, K1, K2; + /* load nonce */ + N0 = LOAD64(npub); + N1 = LOAD64(npub + 8); + /* load key */ + loadkey(&K0, &K1, &K2, k); + /* initialization */ + PINIT(s); + XOR(s->x0, IV); + if (CRYPTO_KEYBYTES == 20) XOR(s->x0, K0); + XOR(s->x1, K1); + XOR(s->x2, K2); + XOR(s->x3, N0); + XOR(s->x4, N1); + P12(s); + if (CRYPTO_KEYBYTES == 20) XOR(s->x2, K0); + XOR(s->x3, K1); + XOR(s->x4, K2); + printstate("initialization", s); +} + +__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + word_t* restrict px; + /* process associated data */ + if (adlen) { + while (adlen >= ASCON_RATE) { + XOR(s->x0, LOAD64(ad)); + if (ASCON_RATE == 16) XOR(s->x1, LOAD64(ad + 8)); + PB(s); + ad += ASCON_RATE; + adlen -= ASCON_RATE; + } + /* final associated data block */ + px = &s->x0; + if (ASCON_RATE == 16 && adlen >= 8) { + XOR(s->x0, LOAD64(ad)); + px = &s->x1; + ad += 8; + adlen -= 8; + } + if (adlen) XOR(*px, LOAD(ad, adlen)); + XOR(*px, PAD(adlen)); + PB(s); + } + XOR(s->x4, WORD_T(1)); + printstate("process associated data", s); +} + +__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m, + uint64_t mlen) { + word_t* restrict px; + /* process plaintext */ + while (mlen >= ASCON_RATE) { + XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + if (ASCON_RATE == 16) { + XOR(s->x1, LOAD64(m + 8)); + STORE64(c + 8, s->x1); + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + mlen -= ASCON_RATE; + } + /* final plaintext block */ + px = &s->x0; + if (ASCON_RATE == 16 && mlen >= 8) { + XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + px = &s->x1; + m += 8; + c += 8; + mlen -= 8; + } + if (mlen) { + XOR(*px, LOAD(m, mlen)); + STORE(c, *px, mlen); + } + XOR(*px, PAD(mlen)); + printstate("process plaintext", s); +} + +__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c, + uint64_t clen) { + word_t* restrict px; + word_t cx; + /* process ciphertext */ + while (clen >= ASCON_RATE) { + cx = LOAD64(c); + XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + if (ASCON_RATE == 16) { + cx = LOAD64(c + 8); + XOR(s->x1, cx); + STORE64(m + 8, s->x1); + s->x1 = cx; + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + clen -= ASCON_RATE; + } + /* final ciphertext block */ + px = &s->x0; + if (ASCON_RATE == 16 && clen >= 8) { + cx = LOAD64(c); + XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + px = &s->x1; + m += 8; + c += 8; + clen -= 8; + } + if (clen) { + cx = LOAD(c, clen); + XOR(*px, cx); + STORE(m, *px, clen); + AND(*px, XMASK(clen)); + XOR(*px, cx); + } + XOR(*px, PAD(clen)); + printstate("process ciphertext", s); +} + +__forceinline void final(state_t* s, const uint8_t* k) { + word_t K0, K1, K2; + /* load key */ + loadkey(&K0, &K1, &K2, k); + /* finalization */ + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) { + XOR(s->x1, K1); + XOR(s->x2, K2); + } + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { + XOR(s->x2, K1); + XOR(s->x3, K2); + } + if (CRYPTO_KEYBYTES == 20) { + XOR(s->x1, KEYROT(K0, K1)); + XOR(s->x2, KEYROT(K1, K2)); + XOR(s->x3, KEYROT(K2, WORD_T(0))); + } + P12(s); + XOR(s->x3, K1); + XOR(s->x4, K2); + printstate("finalization", s); +} + +#if !ASCON_INLINE_MODE + +#define INIT ascon_init +#define ABSORB ascon_absorb +#define ENCRYPT ascon_encrypt +#define DECRYPT ascon_decrypt +#define FINAL ascon_final + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) { + init(s, npub, k); +} + +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + absorb(s, ad, adlen); +} + +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) { + encrypt(s, c, m, mlen); +} + +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) { + decrypt(s, m, c, clen); +} + +void ascon_final(state_t* s, const uint8_t* k) { final(s, k); } + +#else + +#define INIT init +#define ABSORB absorb +#define ENCRYPT encrypt +#define DECRYPT decrypt +#define FINAL final + +#endif + +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + state_t s; + (void)nsec; + *clen = mlen + CRYPTO_ABYTES; + /* perform ascon computation */ + INIT(&s, npub, k); + ABSORB(&s, ad, adlen); + ENCRYPT(&s, c, m, mlen); + FINAL(&s, k); + /* set tag */ + c += mlen; + STORE64(c, s.x3); + STORE64(c + 8, s.x4); + return 0; +} + +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { + state_t s; + (void)nsec; + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + *mlen = clen = clen - CRYPTO_ABYTES; + /* perform ascon computation */ + INIT(&s, npub, k); + ABSORB(&s, ad, adlen); + DECRYPT(&s, m, c, clen); + FINAL(&s, k); + /* verify tag (should be constant time, check compiler output) */ + c += clen; + XOR(s.x3, LOAD64(c)); + XOR(s.x4, LOAD64(c + 8)); + if (NOTZERO(s.x3, s.x4)) { + *mlen = 0; + return -1; + } + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/ascon.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/ascon.h new file mode 100644 index 0000000..10a5b6e --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/ascon.h @@ -0,0 +1,19 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#include "config.h" +#include "word.h" + +typedef struct { + word_t x0, x1, x2, x3, x4; +} state_t; + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k); +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen); +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); +void ascon_final(state_t* s, const uint8_t* k); + +#endif // ASCON_H_ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/config.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/config.h new file mode 100644 index 0000000..4242e2e --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/config.h @@ -0,0 +1,34 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* inline the Ascon mode */ +#ifndef ASCON_INLINE_MODE +#define ASCON_INLINE_MODE 1 +#endif + +/* inline the Ascon permutations */ +#ifndef ASCON_INLINE_PERM +#define ASCON_INLINE_PERM 0 +#endif + +/* single function for all permutations */ +#ifndef ASCON_SINGLE_PERM +#define ASCON_SINGLE_PERM 0 +#endif + +/* unroll the permutation loops */ +#ifndef ASCON_UNROLL_LOOPS +#define ASCON_UNROLL_LOOPS 1 +#endif + +/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ +#ifndef ASCON_DATA_ACCESS +#define ASCON_DATA_ACCESS 'H' +#endif + +/* make sure __forceinline is supported */ +#ifndef __forceinline +#define __forceinline inline __attribute__((always_inline)) +#endif + +#endif /* CONFIG_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/endian.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/endian.h index ffbdcd9..3944360 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/endian.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/endian.h @@ -3,29 +3,37 @@ #if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -// macros for big endian machines +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif #define U64BIG(x) (x) #define U32BIG(x) (x) #define U16BIG(x) (x) #elif defined(_MSC_VER) || \ - (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -// macros for little endian machines -#define U64BIG(x) \ - ((((x) & 0x00000000000000FFULL) << 56) | (((x) & 0x000000000000FF00ULL) << 40) | \ - (((x) & 0x0000000000FF0000ULL) << 24) | (((x) & 0x00000000FF000000ULL) << 8) | \ - (((x) & 0x000000FF00000000ULL) >> 8) | (((x) & 0x0000FF0000000000ULL) >> 24) | \ - (((x) & 0x00FF000000000000ULL) >> 40) | (((x) & 0xFF00000000000000ULL) >> 56)) +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) #define U32BIG(x) \ - ((((x) & 0x000000FF) << 24) | (((x) & 0x0000FF00) << 8) | \ - (((x) & 0x00FF0000) >> 8) | (((x) & 0xFF000000) >> 24)) -#define U16BIG(x) \ - ((((x) & 0x00FF) << 8) | (((x) & 0xFF00) >> 8)) + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) #else -#error "ascon byte order macros not defined in endian.h" +#error "Ascon byte order macros not defined in endian.h" #endif -#endif // ENDIAN_H_ - +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/loadstore.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/loadstore.h new file mode 100644 index 0000000..6cb0cca --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/loadstore.h @@ -0,0 +1,118 @@ +#ifndef LOADSTORE_H_ +#define LOADSTORE_H_ + +#include + +#include "config.h" +#include "endian.h" +#include "word.h" + +/* 64-bit LSB mask (undefined for n == 0) */ +#define MASK(n) (~0ull >> (64 - (n))) + +/* get byte from Ascon 64-bit word */ +#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i)))) + +/* set byte in Ascon 64-bit word */ +#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i))) + +#if ASCON_DATA_ACCESS == 'W' + +#ifndef NDEBUG +#pragma message("Using wordwise data access") +#endif + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = U64BIG(*(uint64_t*)bytes); + return U64TOWORD(x); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = U64BIG(*(uint64_t*)bytes & MASK(8 * n)); + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(8 * n); + *(uint64_t*)bytes |= U64BIG(x); +} + +#elif ASCON_DATA_ACCESS == 'M' + +#ifndef NDEBUG +#pragma message("Using memcpy to access data") +#endif + +#include + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + memcpy((uint8_t*)&x, bytes, n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = U64BIG(WORDTOU64(w)); + memcpy(bytes, (uint8_t*)&x, n); +} + +#elif ASCON_DATA_ACCESS == 'B' + +#ifndef NDEBUG +#pragma message("Using bytewise data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#elif ASCON_DATA_ACCESS == 'H' + +#ifndef NDEBUG +#pragma message("Using hybrid data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + if (n == 8) + x = U64BIG(*(uint64_t*)bytes); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + if (n == 8) + *(uint64_t*)bytes = U64BIG(x); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#else +#error "Ascon data access macro not defined correctly" +#endif + +#endif /* LOADSTORE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/permutations.c b/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/permutations.c new file mode 100644 index 0000000..1bca2ef --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/permutations.c @@ -0,0 +1,35 @@ +#include "permutations.h" + +#include "round.h" + +#if !ASCON_UNROLL_LOOPS || ASCON_SINGLE_PERM + +const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, + {0x6, 0xc}, {0x3, 0xc}, {0x6, 0x9}, {0x3, 0x9}, + {0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}}; + +#endif + +#if ASCON_INLINE_PERM + +#elif ASCON_SINGLE_PERM + +void P(state_t* s, uint8_t rounds) { + printstate(" permutation input", s); + for (int i = START(rounds); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s) { P12ROUNDS(s); } + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16 +void P8(state_t* s) { P8ROUNDS(s); } +#endif + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 +void P6(state_t* s) { P6ROUNDS(s); } +#endif + +#endif diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/permutations.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/permutations.h index 08b8d75..ef338f1 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/permutations.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/permutations.h @@ -1,157 +1,189 @@ #ifndef PERMUTATIONS_H_ #define PERMUTATIONS_H_ -#include "endian.h" - -typedef unsigned char u8; -typedef unsigned int u32; -typedef unsigned long long u64; - -typedef struct { - u32 e; - u32 o; -} u32_2; - -#define EXT_BYTE64(x, n) ((u8)((u64)(x) >> (8 * (7 - (n))))) -#define INS_BYTE64(x, n) ((u64)(x) << (8 * (7 - (n)))) - -// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 -#define to_bit_interleaving(out, in) \ - do { \ - u32 hi = (in) >> 32; \ - u32 lo = (u32)(in); \ - u32 r0, r1; \ - r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \ - r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \ - r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \ - r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \ - r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \ - r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \ - r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \ - r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \ - (out).e = (lo & 0x0000FFFF) | (hi << 16); \ - (out).o = (lo >> 16) | (hi & 0xFFFF0000); \ - } while (0) - -// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 -#define from_bit_interleaving(out, in) \ - do { \ - u32 lo = ((in).e & 0x0000FFFF) | ((in).o << 16); \ - u32 hi = ((in).e >> 16) | ((in).o & 0xFFFF0000); \ - u32 r0, r1; \ - r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \ - r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \ - r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \ - r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \ - r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \ - r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \ - r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \ - r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \ - out = (u64)hi << 32 | lo; \ - } while (0) - -#define ROUND(C_e, C_o) \ - do { \ - u32 reg0, reg1, reg2, reg3; \ - __asm__ __volatile__ ( \ - "eor %[x2_e], %[x2_e], #" #C_e "\n\t" \ - "eor %[x2_o], %[x2_o], #" #C_o "\n\t" \ - "eor %[x0_e], %[x0_e], %[x4_e]\n\t" \ - "eor %[x0_o], %[x0_o], %[x4_o]\n\t" \ - "eor %[x4_e], %[x4_e], %[x3_e]\n\t" \ - "eor %[x4_o], %[x4_o], %[x3_o]\n\t" \ - "eor %[x2_e], %[x2_e], %[x1_e]\n\t" \ - "eor %[x2_o], %[x2_o], %[x1_o]\n\t" \ - "bic %[reg0], %[x0_e], %[x4_e]\n\t" \ - "bic %[reg1], %[x4_e], %[x3_e]\n\t" \ - "bic %[reg2], %[x2_e], %[x1_e]\n\t" \ - "bic %[reg3], %[x1_e], %[x0_e]\n\t" \ - "eor %[x2_e], %[x2_e], %[reg1]\n\t" \ - "eor %[x0_e], %[x0_e], %[reg2]\n\t" \ - "eor %[x4_e], %[x4_e], %[reg3]\n\t" \ - "bic %[reg3], %[x3_e], %[x2_e]\n\t" \ - "eor %[x3_e], %[x3_e], %[reg0]\n\t" \ - "bic %[reg2], %[x0_o], %[x4_o]\n\t" \ - "bic %[reg0], %[x2_o], %[x1_o]\n\t" \ - "bic %[reg1], %[x4_o], %[x3_o]\n\t" \ - "eor %[x1_e], %[x1_e], %[reg3]\n\t" \ - "eor %[x0_o], %[x0_o], %[reg0]\n\t" \ - "eor %[x2_o], %[x2_o], %[reg1]\n\t" \ - "bic %[reg3], %[x1_o], %[x0_o]\n\t" \ - "bic %[reg0], %[x3_o], %[x2_o]\n\t" \ - "eor %[x3_o], %[x3_o], %[reg2]\n\t" \ - "eor %[x3_o], %[x3_o], %[x2_o]\n\t" \ - "eor %[x4_o], %[x4_o], %[reg3]\n\t" \ - "eor %[x1_o], %[x1_o], %[reg0]\n\t" \ - "eor %[x3_e], %[x3_e], %[x2_e]\n\t" \ - "eor %[x1_e], %[x1_e], %[x0_e]\n\t" \ - "eor %[x1_o], %[x1_o], %[x0_o]\n\t" \ - "eor %[x0_e], %[x0_e], %[x4_e]\n\t" \ - "eor %[x0_o], %[x0_o], %[x4_o]\n\t" \ - "mvn %[x2_e], %[x2_e]\n\t" \ - "mvn %[x2_o], %[x2_o]\n\t" \ - "eor %[reg0], %[x0_e], %[x0_o], ror #4\n\t" \ - "eor %[reg1], %[x0_o], %[x0_e], ror #5\n\t" \ - "eor %[reg2], %[x1_e], %[x1_e], ror #11\n\t" \ - "eor %[reg3], %[x1_o], %[x1_o], ror #11\n\t" \ - "eor %[x0_e], %[x0_e], %[reg1], ror #9\n\t" \ - "eor %[x0_o], %[x0_o], %[reg0], ror #10\n\t" \ - "eor %[x1_e], %[x1_e], %[reg3], ror #19\n\t" \ - "eor %[x1_o], %[x1_o], %[reg2], ror #20\n\t" \ - "eor %[reg0], %[x2_e], %[x2_o], ror #2\n\t" \ - "eor %[reg1], %[x2_o], %[x2_e], ror #3\n\t" \ - "eor %[reg2], %[x3_e], %[x3_o], ror #3\n\t" \ - "eor %[reg3], %[x3_o], %[x3_e], ror #4\n\t" \ - "eor %[x2_e], %[x2_e], %[reg1]\n\t" \ - "eor %[x2_o], %[x2_o], %[reg0], ror #1\n\t" \ - "eor %[x3_e], %[x3_e], %[reg2], ror #5\n\t" \ - "eor %[x3_o], %[x3_o], %[reg3], ror #5\n\t" \ - "eor %[reg0], %[x4_e], %[x4_e], ror #17\n\t" \ - "eor %[reg1], %[x4_o], %[x4_o], ror #17\n\t" \ - "eor %[x4_e], %[x4_e], %[reg1], ror #3\n\t" \ - "eor %[x4_o], %[x4_o], %[reg0], ror #4\n\t" \ - : [x0_e] "+r" (x0.e), [x1_e] "+r" (x1.e), [x2_e] "+r" (x2.e), [x3_e] "+r" (x3.e), [x4_e] "+r" (x4.e), \ - [x0_o] "+r" (x0.o), [x1_o] "+r" (x1.o), [x2_o] "+r" (x2.o), [x3_o] "+r" (x3.o), [x4_o] "+r" (x4.o), \ - [reg0] "=r" (reg0), [reg1] "=r" (reg1), [reg2] "=r" (reg2), [reg3] "=r" (reg3)::); \ - } while (0) - -#define P12() \ - do { \ - ROUND(0xc, 0xc); \ - ROUND(0x9, 0xc); \ - ROUND(0xc, 0x9); \ - ROUND(0x9, 0x9); \ - ROUND(0x6, 0xc); \ - ROUND(0x3, 0xc); \ - ROUND(0x6, 0x9); \ - ROUND(0x3, 0x9); \ - ROUND(0xc, 0x6); \ - ROUND(0x9, 0x6); \ - ROUND(0xc, 0x3); \ - ROUND(0x9, 0x3); \ - } while (0) - -#define P8() \ - do { \ - ROUND(0x6, 0xc); \ - ROUND(0x3, 0xc); \ - ROUND(0x6, 0x9); \ - ROUND(0x3, 0x9); \ - ROUND(0xc, 0x6); \ - ROUND(0x9, 0x6); \ - ROUND(0xc, 0x3); \ - ROUND(0x9, 0x3); \ - } while (0) - -#define P6() \ - do { \ - ROUND(0x6, 0x9); \ - ROUND(0x3, 0x9); \ - ROUND(0xc, 0x6); \ - ROUND(0x9, 0x6); \ - ROUND(0xc, 0x3); \ - ROUND(0x9, 0x3); \ - } while (0) - -#endif // PERMUTATIONS_H_ +#include +#include "api.h" +#include "ascon.h" +#include "printstate.h" +#include "round.h" + +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 + +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 + +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 + +#define ASCON_HASH_BYTES 32 + +#define ASCON_128_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_128A_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) + +#define ASCON_80PQ_IV \ + U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_HASH_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) + +#define ASCON_XOF_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) + +#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) +#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) +#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) +#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) +#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) + +#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) +#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) +#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) +#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) +#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 +#define IV ASCON_128_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#if ASCON_RATE == 16 +#define IV ASCON_128A_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define PB P8 +#endif + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 +#define IV ASCON_80PQ_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#define START(n) (12 - n) + +#if ASCON_UNROLL_LOOPS + +__forceinline void P12ROUNDS(state_t* s) { + ROUND(s, 0xc, 0xc); + ROUND(s, 0x9, 0xc); + ROUND(s, 0xc, 0x9); + ROUND(s, 0x9, 0x9); + ROUND(s, 0x6, 0xc); + ROUND(s, 0x3, 0xc); + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +__forceinline void P8ROUNDS(state_t* s) { + ROUND(s, 0x6, 0xc); + ROUND(s, 0x3, 0xc); + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +__forceinline void P6ROUNDS(state_t* s) { + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +#else /* !ASCON_UNROLL_LOOPS */ + +extern const uint8_t constants[][2]; + +__forceinline void P12ROUNDS(state_t* s) { + for (int i = START(12); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +__forceinline void P8ROUNDS(state_t* s) { + for (int i = START(8); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +__forceinline void P6ROUNDS(state_t* s) { + for (int i = START(6); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +#endif + +#if ASCON_INLINE_PERM + +__forceinline void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +__forceinline void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} + +__forceinline void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#elif ASCON_SINGLE_PERM + +#define P12(s) P(s, 12) +#define P8(s) P(s, 8) +#define P6(s) P(s, 6) + +void P(state_t* s, uint8_t rounds); + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s); +void P8(state_t* s); +void P6(state_t* s); + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#endif + +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/printstate.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/printstate.h new file mode 100644 index 0000000..34bd476 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/printstate.h @@ -0,0 +1,31 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/round.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/round.h new file mode 100644 index 0000000..06da1ca --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/round.h @@ -0,0 +1,102 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" + +__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { + *K0 = WORD_T(0); + *K1 = WORD_T(0); + *K2 = WORD_T(0); +} + +__forceinline void PINIT(state_t* s) { + s->x0 = WORD_T(0); + s->x1 = WORD_T(0); + s->x2 = WORD_T(0); + s->x3 = WORD_T(0); + s->x4 = WORD_T(0); +} + +__forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) { + uint32_t tmp0, tmp1, tmp2, tmp3; + /* clang-format off */ + __asm__ __volatile__( \ + "eor %[x2_e], %[x2_e], %[C_e]\n\t" \ + "eor %[x2_o], %[x2_o], %[C_o]\n\t" \ + "eor %[x0_e], %[x0_e], %[x4_e]\n\t" \ + "eor %[x0_o], %[x0_o], %[x4_o]\n\t" \ + "eor %[x4_e], %[x4_e], %[x3_e]\n\t" \ + "eor %[x4_o], %[x4_o], %[x3_o]\n\t" \ + "eor %[x2_e], %[x2_e], %[x1_e]\n\t" \ + "eor %[x2_o], %[x2_o], %[x1_o]\n\t" \ + "bic %[tmp0], %[x0_e], %[x4_e]\n\t" \ + "bic %[tmp1], %[x4_e], %[x3_e]\n\t" \ + "bic %[tmp2], %[x2_e], %[x1_e]\n\t" \ + "bic %[tmp3], %[x1_e], %[x0_e]\n\t" \ + "eor %[x2_e], %[x2_e], %[tmp1]\n\t" \ + "eor %[x0_e], %[x0_e], %[tmp2]\n\t" \ + "eor %[x4_e], %[x4_e], %[tmp3]\n\t" \ + "bic %[tmp3], %[x3_e], %[x2_e]\n\t" \ + "eor %[x3_e], %[x3_e], %[tmp0]\n\t" \ + "bic %[tmp2], %[x0_o], %[x4_o]\n\t" \ + "bic %[tmp0], %[x2_o], %[x1_o]\n\t" \ + "bic %[tmp1], %[x4_o], %[x3_o]\n\t" \ + "eor %[x1_e], %[x1_e], %[tmp3]\n\t" \ + "eor %[x0_o], %[x0_o], %[tmp0]\n\t" \ + "eor %[x2_o], %[x2_o], %[tmp1]\n\t" \ + "bic %[tmp3], %[x1_o], %[x0_o]\n\t" \ + "bic %[tmp0], %[x3_o], %[x2_o]\n\t" \ + "eor %[x3_o], %[x3_o], %[tmp2]\n\t" \ + "eor %[x3_o], %[x3_o], %[x2_o]\n\t" \ + "eor %[x4_o], %[x4_o], %[tmp3]\n\t" \ + "eor %[x1_o], %[x1_o], %[tmp0]\n\t" \ + "eor %[x3_e], %[x3_e], %[x2_e]\n\t" \ + "eor %[x1_e], %[x1_e], %[x0_e]\n\t" \ + "eor %[x1_o], %[x1_o], %[x0_o]\n\t" \ + "eor %[x0_e], %[x0_e], %[x4_e]\n\t" \ + "eor %[x0_o], %[x0_o], %[x4_o]\n\t" \ + "mvn %[x2_e], %[x2_e]\n\t" \ + "mvn %[x2_o], %[x2_o]\n\t" \ + "eor %[tmp0], %[x0_e], %[x0_o], ror #4\n\t" \ + "eor %[tmp1], %[x0_o], %[x0_e], ror #5\n\t" \ + "eor %[tmp2], %[x1_e], %[x1_e], ror #11\n\t" \ + "eor %[tmp3], %[x1_o], %[x1_o], ror #11\n\t" \ + "eor %[x0_e], %[x0_e], %[tmp1], ror #9\n\t" \ + "eor %[x0_o], %[x0_o], %[tmp0], ror #10\n\t" \ + "eor %[x1_e], %[x1_e], %[tmp3], ror #19\n\t" \ + "eor %[x1_o], %[x1_o], %[tmp2], ror #20\n\t" \ + "eor %[tmp0], %[x2_e], %[x2_o], ror #2\n\t" \ + "eor %[tmp1], %[x2_o], %[x2_e], ror #3\n\t" \ + "eor %[tmp2], %[x3_e], %[x3_o], ror #3\n\t" \ + "eor %[tmp3], %[x3_o], %[x3_e], ror #4\n\t" \ + "eor %[x2_e], %[x2_e], %[tmp1]\n\t" \ + "eor %[x2_o], %[x2_o], %[tmp0], ror #1\n\t" \ + "eor %[x3_e], %[x3_e], %[tmp2], ror #5\n\t" \ + "eor %[x3_o], %[x3_o], %[tmp3], ror #5\n\t" \ + "eor %[tmp0], %[x4_e], %[x4_e], ror #17\n\t" \ + "eor %[tmp1], %[x4_o], %[x4_o], ror #17\n\t" \ + "eor %[x4_e], %[x4_e], %[tmp1], ror #3\n\t" \ + "eor %[x4_o], %[x4_o], %[tmp0], ror #4\n\t" \ + : [ x0_e ] "+r"(s->x0.e), \ + [ x1_e ] "+r"(s->x1.e), \ + [ x2_e ] "+r"(s->x2.e), \ + [ x3_e ] "+r"(s->x3.e), \ + [ x4_e ] "+r"(s->x4.e), \ + [ x0_o ] "+r"(s->x0.o), \ + [ x1_o ] "+r"(s->x1.o), \ + [ x2_o ] "+r"(s->x2.o), \ + [ x3_o ] "+r"(s->x3.o), \ + [ x4_o ] "+r"(s->x4.o), \ + [ tmp0 ] "=r"(tmp0), \ + [ tmp1 ] "=r"(tmp1), \ + [ tmp2 ] "=r"(tmp2), \ + [ tmp3 ] "=r"(tmp3) \ + : [ C_e ] "i"(C_e), \ + [ C_o ] "i"(C_o) \ + : ); + /* clang-format on */ + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/word.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/word.h new file mode 100644 index 0000000..8ffcaaa --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_arm/word.h @@ -0,0 +1,117 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#include "config.h" + +typedef struct { + uint32_t e; + uint32_t o; +} word_t; + +__forceinline word_t WORD_T(uint64_t x) { + return (word_t){.o = x >> 32, .e = x}; +} + +__forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; } + +__forceinline uint64_t TOBI32(uint64_t in); + +__forceinline uint64_t FROMBI32(uint64_t in); + +__forceinline word_t U64TOWORD(uint64_t x) { + uint64_t w = TOBI32(x); + return (word_t){.o = w >> 32, .e = w}; +} + +__forceinline uint64_t WORDTOU64(word_t w) { + return FROMBI32((uint64_t)w.o << 32 | w.e); +} + +#define XOR(a, b) \ + do { \ + word_t tb = b; \ + (a).e ^= tb.e; \ + (a).o ^= tb.o; \ + } while (0) + +#define AND(a, b) \ + do { \ + word_t tb = b; \ + (a).e &= tb.e; \ + (a).o &= tb.o; \ + } while (0) + +__forceinline uint32_t ROR32(uint32_t x, int n) { + return x >> n | x << (32 - n); +} + +__forceinline word_t ROR64(word_t x, int n) { + word_t r; + r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); + r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); + return r; +} + +__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { + word_t r; + r.o = lo2hi.o << 16 | hi2lo.o >> 16; + r.e = lo2hi.e << 16 | hi2lo.e >> 16; + return r; +} + +__forceinline int NOTZERO(word_t a, word_t b) { + int result = 0; + for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i]; + for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i]; + return result; +} + +/* set padding byte in 64-bit Ascon word */ +__forceinline word_t PAD(int i) { + return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32); +} + +/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ +__forceinline word_t XMASK(int n) { + uint32_t mask = 0x0fffffff >> (n * 4 - 4); + return WORD_T((uint64_t)mask << 32 | mask); +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t TOBI32(uint64_t in) { + uint32_t hi = in >> 32; + uint32_t lo = in; + uint32_t r0, r1; + r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); + r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); + r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); + r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); + r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); + r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); + r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); + r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); + r0 = (lo & 0x0000FFFF) | (hi << 16); + r1 = (lo >> 16) | (hi & 0xFFFF0000); + return (uint64_t)r1 << 32 | r0; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t FROMBI32(uint64_t in) { + uint32_t r0 = in; + uint32_t r1 = in >> 32; + uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); + uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000); + r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); + r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); + r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); + r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); + r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); + r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); + r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); + r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); + return (uint64_t)hi << 32 | lo; +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/api.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/api.h index a4aa567..96a7c47 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/api.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/api.h @@ -3,3 +3,4 @@ #define CRYPTO_NPUBBYTES 16 #define CRYPTO_ABYTES 16 #define CRYPTO_NOOVERLAP 1 +#define ASCON_RATE 8 diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/ascon.c b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/ascon.c new file mode 100644 index 0000000..9011a77 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/ascon.c @@ -0,0 +1,250 @@ +#include "ascon.h" + +#include "api.h" +#include "loadstore.h" +#include "permutations.h" +#include "printstate.h" + +__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2, + const uint8_t* k) { + KINIT(K0, K1, K2); + if (CRYPTO_KEYBYTES == 20) { + XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4))); + k += 4; + } + XOR(*K1, LOAD64(k)); + XOR(*K2, LOAD64(k + 8)); +} + +__forceinline void init(state_t* s, const uint8_t* npub, const uint8_t* k) { + word_t N0, N1; + word_t K0, K1, K2; + /* load nonce */ + N0 = LOAD64(npub); + N1 = LOAD64(npub + 8); + /* load key */ + loadkey(&K0, &K1, &K2, k); + /* initialization */ + PINIT(s); + XOR(s->x0, IV); + if (CRYPTO_KEYBYTES == 20) XOR(s->x0, K0); + XOR(s->x1, K1); + XOR(s->x2, K2); + XOR(s->x3, N0); + XOR(s->x4, N1); + P12(s); + if (CRYPTO_KEYBYTES == 20) XOR(s->x2, K0); + XOR(s->x3, K1); + XOR(s->x4, K2); + printstate("initialization", s); +} + +__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + word_t* restrict px; + /* process associated data */ + if (adlen) { + while (adlen >= ASCON_RATE) { + XOR(s->x0, LOAD64(ad)); + if (ASCON_RATE == 16) XOR(s->x1, LOAD64(ad + 8)); + PB(s); + ad += ASCON_RATE; + adlen -= ASCON_RATE; + } + /* final associated data block */ + px = &s->x0; + if (ASCON_RATE == 16 && adlen >= 8) { + XOR(s->x0, LOAD64(ad)); + px = &s->x1; + ad += 8; + adlen -= 8; + } + if (adlen) XOR(*px, LOAD(ad, adlen)); + XOR(*px, PAD(adlen)); + PB(s); + } + XOR(s->x4, WORD_T(1)); + printstate("process associated data", s); +} + +__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m, + uint64_t mlen) { + word_t* restrict px; + /* process plaintext */ + while (mlen >= ASCON_RATE) { + XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + if (ASCON_RATE == 16) { + XOR(s->x1, LOAD64(m + 8)); + STORE64(c + 8, s->x1); + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + mlen -= ASCON_RATE; + } + /* final plaintext block */ + px = &s->x0; + if (ASCON_RATE == 16 && mlen >= 8) { + XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + px = &s->x1; + m += 8; + c += 8; + mlen -= 8; + } + if (mlen) { + XOR(*px, LOAD(m, mlen)); + STORE(c, *px, mlen); + } + XOR(*px, PAD(mlen)); + printstate("process plaintext", s); +} + +__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c, + uint64_t clen) { + word_t* restrict px; + word_t cx; + /* process ciphertext */ + while (clen >= ASCON_RATE) { + cx = LOAD64(c); + XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + if (ASCON_RATE == 16) { + cx = LOAD64(c + 8); + XOR(s->x1, cx); + STORE64(m + 8, s->x1); + s->x1 = cx; + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + clen -= ASCON_RATE; + } + /* final ciphertext block */ + px = &s->x0; + if (ASCON_RATE == 16 && clen >= 8) { + cx = LOAD64(c); + XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + px = &s->x1; + m += 8; + c += 8; + clen -= 8; + } + if (clen) { + cx = LOAD(c, clen); + XOR(*px, cx); + STORE(m, *px, clen); + AND(*px, XMASK(clen)); + XOR(*px, cx); + } + XOR(*px, PAD(clen)); + printstate("process ciphertext", s); +} + +__forceinline void final(state_t* s, const uint8_t* k) { + word_t K0, K1, K2; + /* load key */ + loadkey(&K0, &K1, &K2, k); + /* finalization */ + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) { + XOR(s->x1, K1); + XOR(s->x2, K2); + } + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { + XOR(s->x2, K1); + XOR(s->x3, K2); + } + if (CRYPTO_KEYBYTES == 20) { + XOR(s->x1, KEYROT(K0, K1)); + XOR(s->x2, KEYROT(K1, K2)); + XOR(s->x3, KEYROT(K2, WORD_T(0))); + } + P12(s); + XOR(s->x3, K1); + XOR(s->x4, K2); + printstate("finalization", s); +} + +#if !ASCON_INLINE_MODE + +#define INIT ascon_init +#define ABSORB ascon_absorb +#define ENCRYPT ascon_encrypt +#define DECRYPT ascon_decrypt +#define FINAL ascon_final + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) { + init(s, npub, k); +} + +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + absorb(s, ad, adlen); +} + +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) { + encrypt(s, c, m, mlen); +} + +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) { + decrypt(s, m, c, clen); +} + +void ascon_final(state_t* s, const uint8_t* k) { final(s, k); } + +#else + +#define INIT init +#define ABSORB absorb +#define ENCRYPT encrypt +#define DECRYPT decrypt +#define FINAL final + +#endif + +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + state_t s; + (void)nsec; + *clen = mlen + CRYPTO_ABYTES; + /* perform ascon computation */ + INIT(&s, npub, k); + ABSORB(&s, ad, adlen); + ENCRYPT(&s, c, m, mlen); + FINAL(&s, k); + /* set tag */ + c += mlen; + STORE64(c, s.x3); + STORE64(c + 8, s.x4); + return 0; +} + +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { + state_t s; + (void)nsec; + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + *mlen = clen = clen - CRYPTO_ABYTES; + /* perform ascon computation */ + INIT(&s, npub, k); + ABSORB(&s, ad, adlen); + DECRYPT(&s, m, c, clen); + FINAL(&s, k); + /* verify tag (should be constant time, check compiler output) */ + c += clen; + XOR(s.x3, LOAD64(c)); + XOR(s.x4, LOAD64(c + 8)); + if (NOTZERO(s.x3, s.x4)) { + *mlen = 0; + return -1; + } + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/ascon.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/ascon.h new file mode 100644 index 0000000..10a5b6e --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/ascon.h @@ -0,0 +1,19 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#include "config.h" +#include "word.h" + +typedef struct { + word_t x0, x1, x2, x3, x4; +} state_t; + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k); +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen); +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); +void ascon_final(state_t* s, const uint8_t* k); + +#endif // ASCON_H_ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/config.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/config.h new file mode 100644 index 0000000..5ccce77 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/config.h @@ -0,0 +1,34 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* inline the Ascon mode */ +#ifndef ASCON_INLINE_MODE +#define ASCON_INLINE_MODE 1 +#endif + +/* inline the Ascon permutations */ +#ifndef ASCON_INLINE_PERM +#define ASCON_INLINE_PERM 0 +#endif + +/* single function for all permutations */ +#ifndef ASCON_SINGLE_PERM +#define ASCON_SINGLE_PERM 0 +#endif + +/* unroll the permutation loops */ +#ifndef ASCON_UNROLL_LOOPS +#define ASCON_UNROLL_LOOPS 1 +#endif + +/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ +#ifndef ASCON_DATA_ACCESS +#define ASCON_DATA_ACCESS 'M' +#endif + +/* make sure __forceinline is supported */ +#ifndef __forceinline +#define __forceinline inline __attribute__((always_inline)) +#endif + +#endif /* CONFIG_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/endian.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/endian.h index ffbdcd9..3944360 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/endian.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/endian.h @@ -3,29 +3,37 @@ #if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -// macros for big endian machines +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif #define U64BIG(x) (x) #define U32BIG(x) (x) #define U16BIG(x) (x) #elif defined(_MSC_VER) || \ - (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -// macros for little endian machines -#define U64BIG(x) \ - ((((x) & 0x00000000000000FFULL) << 56) | (((x) & 0x000000000000FF00ULL) << 40) | \ - (((x) & 0x0000000000FF0000ULL) << 24) | (((x) & 0x00000000FF000000ULL) << 8) | \ - (((x) & 0x000000FF00000000ULL) >> 8) | (((x) & 0x0000FF0000000000ULL) >> 24) | \ - (((x) & 0x00FF000000000000ULL) >> 40) | (((x) & 0xFF00000000000000ULL) >> 56)) +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) #define U32BIG(x) \ - ((((x) & 0x000000FF) << 24) | (((x) & 0x0000FF00) << 8) | \ - (((x) & 0x00FF0000) >> 8) | (((x) & 0xFF000000) >> 24)) -#define U16BIG(x) \ - ((((x) & 0x00FF) << 8) | (((x) & 0xFF00) >> 8)) + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) #else -#error "ascon byte order macros not defined in endian.h" +#error "Ascon byte order macros not defined in endian.h" #endif -#endif // ENDIAN_H_ - +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/loadstore.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/loadstore.h new file mode 100644 index 0000000..6cb0cca --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/loadstore.h @@ -0,0 +1,118 @@ +#ifndef LOADSTORE_H_ +#define LOADSTORE_H_ + +#include + +#include "config.h" +#include "endian.h" +#include "word.h" + +/* 64-bit LSB mask (undefined for n == 0) */ +#define MASK(n) (~0ull >> (64 - (n))) + +/* get byte from Ascon 64-bit word */ +#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i)))) + +/* set byte in Ascon 64-bit word */ +#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i))) + +#if ASCON_DATA_ACCESS == 'W' + +#ifndef NDEBUG +#pragma message("Using wordwise data access") +#endif + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = U64BIG(*(uint64_t*)bytes); + return U64TOWORD(x); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = U64BIG(*(uint64_t*)bytes & MASK(8 * n)); + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(8 * n); + *(uint64_t*)bytes |= U64BIG(x); +} + +#elif ASCON_DATA_ACCESS == 'M' + +#ifndef NDEBUG +#pragma message("Using memcpy to access data") +#endif + +#include + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + memcpy((uint8_t*)&x, bytes, n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = U64BIG(WORDTOU64(w)); + memcpy(bytes, (uint8_t*)&x, n); +} + +#elif ASCON_DATA_ACCESS == 'B' + +#ifndef NDEBUG +#pragma message("Using bytewise data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#elif ASCON_DATA_ACCESS == 'H' + +#ifndef NDEBUG +#pragma message("Using hybrid data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + if (n == 8) + x = U64BIG(*(uint64_t*)bytes); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + if (n == 8) + *(uint64_t*)bytes = U64BIG(x); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#else +#error "Ascon data access macro not defined correctly" +#endif + +#endif /* LOADSTORE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/permutations.c b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/permutations.c new file mode 100644 index 0000000..1bca2ef --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/permutations.c @@ -0,0 +1,35 @@ +#include "permutations.h" + +#include "round.h" + +#if !ASCON_UNROLL_LOOPS || ASCON_SINGLE_PERM + +const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, + {0x6, 0xc}, {0x3, 0xc}, {0x6, 0x9}, {0x3, 0x9}, + {0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}}; + +#endif + +#if ASCON_INLINE_PERM + +#elif ASCON_SINGLE_PERM + +void P(state_t* s, uint8_t rounds) { + printstate(" permutation input", s); + for (int i = START(rounds); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s) { P12ROUNDS(s); } + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16 +void P8(state_t* s) { P8ROUNDS(s); } +#endif + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 +void P6(state_t* s) { P6ROUNDS(s); } +#endif + +#endif diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/permutations.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/permutations.h index 10ae468..ef338f1 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/permutations.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/permutations.h @@ -1,134 +1,189 @@ #ifndef PERMUTATIONS_H_ #define PERMUTATIONS_H_ -#include "endian.h" - -typedef unsigned char u8; -typedef unsigned int u32; -typedef unsigned long long u64; - -typedef struct { - u32 e; - u32 o; -} u32_2; - -#define EXT_BYTE64(x, n) ((u8)((u64)(x) >> (8 * (7 - (n))))) -#define INS_BYTE64(x, n) ((u64)(x) << (8 * (7 - (n)))) -#define ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n)))) - -// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 -#define to_bit_interleaving(out, in) \ - do { \ - u32 hi = (in) >> 32; \ - u32 lo = (u32)(in); \ - u32 r0, r1; \ - r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \ - r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \ - r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \ - r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \ - r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \ - r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \ - r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \ - r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \ - (out).e = (lo & 0x0000FFFF) | (hi << 16); \ - (out).o = (lo >> 16) | (hi & 0xFFFF0000); \ - } while (0) - -// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 -#define from_bit_interleaving(out, in) \ - do { \ - u32 lo = ((in).e & 0x0000FFFF) | ((in).o << 16); \ - u32 hi = ((in).e >> 16) | ((in).o & 0xFFFF0000); \ - u32 r0, r1; \ - r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \ - r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \ - r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \ - r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \ - r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \ - r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \ - r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \ - r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \ - out = (u64)hi << 32 | lo; \ - } while (0) - -#define ROUND(C_e, C_o) \ - do { \ - /* round constant */ \ - x2.e ^= C_e; x2.o ^= C_o; \ - /* s-box layer */ \ - x0.e ^= x4.e; x0.o ^= x4.o; \ - x4.e ^= x3.e; x4.o ^= x3.o; \ - x2.e ^= x1.e; x2.o ^= x1.o; \ - t0.e = x0.e & (~x4.e); t0.o = x0.o & (~x4.o); \ - x0.e ^= x2.e & (~x1.e); x0.o ^= x2.o & (~x1.o); \ - x2.e ^= x4.e & (~x3.e); x2.o ^= x4.o & (~x3.o); \ - x4.e ^= x1.e & (~x0.e); x4.o ^= x1.o & (~x0.o); \ - x1.e ^= x3.e & (~x2.e); x1.o ^= x3.o & (~x2.o); \ - x3.e ^= t0.e; x3.o ^= t0.o; \ - x1.e ^= x0.e; x1.o ^= x0.o; \ - x3.e ^= x2.e; x3.o ^= x2.o; \ - x0.e ^= x4.e; x0.o ^= x4.o; \ - /* linear layer */ \ - t0.e = x0.e ^ ROTR32(x0.o, 4); \ - t0.o = x0.o ^ ROTR32(x0.e, 5); \ - x0.e ^= ROTR32(t0.o, 9); \ - x0.o ^= ROTR32(t0.e, 10); \ - t0.e = x1.e ^ ROTR32(x1.e, 11); \ - t0.o = x1.o ^ ROTR32(x1.o, 11); \ - x1.e ^= ROTR32(t0.o, 19); \ - x1.o ^= ROTR32(t0.e, 20); \ - t0.e = x2.e ^ ROTR32(x2.o, 2); \ - t0.o = x2.o ^ ROTR32(x2.e, 3); \ - x2.e ^= t0.o; \ - x2.o ^= ROTR32(t0.e, 1); \ - t0.e = x3.e ^ ROTR32(x3.o, 3); \ - t0.o = x3.o ^ ROTR32(x3.e, 4); \ - x3.e ^= ROTR32(t0.e, 5); \ - x3.o ^= ROTR32(t0.o, 5); \ - t0.e = x4.e ^ ROTR32(x4.e, 17); \ - t0.o = x4.o ^ ROTR32(x4.o, 17); \ - x4.e ^= ROTR32(t0.o, 3); \ - x4.o ^= ROTR32(t0.e, 4); \ - x2.e = ~x2.e; x2.o = ~x2.o; \ - } while(0) - -#define P12() \ - do { \ - ROUND(0xc, 0xc); \ - ROUND(0x9, 0xc); \ - ROUND(0xc, 0x9); \ - ROUND(0x9, 0x9); \ - ROUND(0x6, 0xc); \ - ROUND(0x3, 0xc); \ - ROUND(0x6, 0x9); \ - ROUND(0x3, 0x9); \ - ROUND(0xc, 0x6); \ - ROUND(0x9, 0x6); \ - ROUND(0xc, 0x3); \ - ROUND(0x9, 0x3); \ - } while (0) - -#define P8() \ - do { \ - ROUND(0x6, 0xc); \ - ROUND(0x3, 0xc); \ - ROUND(0x6, 0x9); \ - ROUND(0x3, 0x9); \ - ROUND(0xc, 0x6); \ - ROUND(0x9, 0x6); \ - ROUND(0xc, 0x3); \ - ROUND(0x9, 0x3); \ - } while (0) - -#define P6() \ - do { \ - ROUND(0x6, 0x9); \ - ROUND(0x3, 0x9); \ - ROUND(0xc, 0x6); \ - ROUND(0x9, 0x6); \ - ROUND(0xc, 0x3); \ - ROUND(0x9, 0x3); \ - } while (0) - -#endif // PERMUTATIONS_H_ +#include +#include "api.h" +#include "ascon.h" +#include "printstate.h" +#include "round.h" + +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 + +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 + +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 + +#define ASCON_HASH_BYTES 32 + +#define ASCON_128_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_128A_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) + +#define ASCON_80PQ_IV \ + U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_HASH_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) + +#define ASCON_XOF_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) + +#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) +#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) +#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) +#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) +#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) + +#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) +#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) +#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) +#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) +#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 +#define IV ASCON_128_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#if ASCON_RATE == 16 +#define IV ASCON_128A_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define PB P8 +#endif + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 +#define IV ASCON_80PQ_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#define START(n) (12 - n) + +#if ASCON_UNROLL_LOOPS + +__forceinline void P12ROUNDS(state_t* s) { + ROUND(s, 0xc, 0xc); + ROUND(s, 0x9, 0xc); + ROUND(s, 0xc, 0x9); + ROUND(s, 0x9, 0x9); + ROUND(s, 0x6, 0xc); + ROUND(s, 0x3, 0xc); + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +__forceinline void P8ROUNDS(state_t* s) { + ROUND(s, 0x6, 0xc); + ROUND(s, 0x3, 0xc); + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +__forceinline void P6ROUNDS(state_t* s) { + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +#else /* !ASCON_UNROLL_LOOPS */ + +extern const uint8_t constants[][2]; + +__forceinline void P12ROUNDS(state_t* s) { + for (int i = START(12); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +__forceinline void P8ROUNDS(state_t* s) { + for (int i = START(8); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +__forceinline void P6ROUNDS(state_t* s) { + for (int i = START(6); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +#endif + +#if ASCON_INLINE_PERM + +__forceinline void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +__forceinline void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} + +__forceinline void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#elif ASCON_SINGLE_PERM + +#define P12(s) P(s, 12) +#define P8(s) P(s, 8) +#define P6(s) P(s, 6) + +void P(state_t* s, uint8_t rounds); + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s); +void P8(state_t* s); +void P6(state_t* s); + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#endif + +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/printstate.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/printstate.h new file mode 100644 index 0000000..34bd476 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/printstate.h @@ -0,0 +1,31 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/round.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/round.h new file mode 100644 index 0000000..bc7a0cd --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/round.h @@ -0,0 +1,77 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" + +__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { + *K0 = WORD_T(0); + *K1 = WORD_T(0); + *K2 = WORD_T(0); +} + +__forceinline void PINIT(state_t* s) { + s->x0 = WORD_T(0); + s->x1 = WORD_T(0); + s->x2 = WORD_T(0); + s->x3 = WORD_T(0); + s->x4 = WORD_T(0); +} + +__forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) { + uint32_t tmp_e, tmp_o; + /* round constant */ + s->x2.e ^= C_e; + s->x2.o ^= C_o; + /* s-box layer */ + s->x0.e ^= s->x4.e; + s->x0.o ^= s->x4.o; + s->x4.e ^= s->x3.e; + s->x4.o ^= s->x3.o; + s->x2.e ^= s->x1.e; + s->x2.o ^= s->x1.o; + tmp_e = s->x0.e & (~s->x4.e); + tmp_o = s->x0.o & (~s->x4.o); + s->x0.e ^= s->x2.e & (~s->x1.e); + s->x0.o ^= s->x2.o & (~s->x1.o); + s->x2.e ^= s->x4.e & (~s->x3.e); + s->x2.o ^= s->x4.o & (~s->x3.o); + s->x4.e ^= s->x1.e & (~s->x0.e); + s->x4.o ^= s->x1.o & (~s->x0.o); + s->x1.e ^= s->x3.e & (~s->x2.e); + s->x1.o ^= s->x3.o & (~s->x2.o); + s->x3.e ^= tmp_e; + s->x3.o ^= tmp_o; + s->x1.e ^= s->x0.e; + s->x1.o ^= s->x0.o; + s->x3.e ^= s->x2.e; + s->x3.o ^= s->x2.o; + s->x0.e ^= s->x4.e; + s->x0.o ^= s->x4.o; + /* linear layer */ + tmp_e = s->x0.e ^ ROR32(s->x0.o, 4); + tmp_o = s->x0.o ^ ROR32(s->x0.e, 5); + s->x0.e ^= ROR32(tmp_o, 9); + s->x0.o ^= ROR32(tmp_e, 10); + tmp_e = s->x1.e ^ ROR32(s->x1.e, 11); + tmp_o = s->x1.o ^ ROR32(s->x1.o, 11); + s->x1.e ^= ROR32(tmp_o, 19); + s->x1.o ^= ROR32(tmp_e, 20); + tmp_e = s->x2.e ^ ROR32(s->x2.o, 2); + tmp_o = s->x2.o ^ ROR32(s->x2.e, 3); + s->x2.e ^= tmp_o; + s->x2.o ^= ROR32(tmp_e, 1); + tmp_e = s->x3.e ^ ROR32(s->x3.o, 3); + tmp_o = s->x3.o ^ ROR32(s->x3.e, 4); + s->x3.e ^= ROR32(tmp_e, 5); + s->x3.o ^= ROR32(tmp_o, 5); + tmp_e = s->x4.e ^ ROR32(s->x4.e, 17); + tmp_o = s->x4.o ^ ROR32(s->x4.o, 17); + s->x4.e ^= ROR32(tmp_o, 3); + s->x4.o ^= ROR32(tmp_e, 4); + s->x2.e = ~s->x2.e; + s->x2.o = ~s->x2.o; + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/word.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/word.h new file mode 100644 index 0000000..8ffcaaa --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowreg/word.h @@ -0,0 +1,117 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#include "config.h" + +typedef struct { + uint32_t e; + uint32_t o; +} word_t; + +__forceinline word_t WORD_T(uint64_t x) { + return (word_t){.o = x >> 32, .e = x}; +} + +__forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; } + +__forceinline uint64_t TOBI32(uint64_t in); + +__forceinline uint64_t FROMBI32(uint64_t in); + +__forceinline word_t U64TOWORD(uint64_t x) { + uint64_t w = TOBI32(x); + return (word_t){.o = w >> 32, .e = w}; +} + +__forceinline uint64_t WORDTOU64(word_t w) { + return FROMBI32((uint64_t)w.o << 32 | w.e); +} + +#define XOR(a, b) \ + do { \ + word_t tb = b; \ + (a).e ^= tb.e; \ + (a).o ^= tb.o; \ + } while (0) + +#define AND(a, b) \ + do { \ + word_t tb = b; \ + (a).e &= tb.e; \ + (a).o &= tb.o; \ + } while (0) + +__forceinline uint32_t ROR32(uint32_t x, int n) { + return x >> n | x << (32 - n); +} + +__forceinline word_t ROR64(word_t x, int n) { + word_t r; + r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); + r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); + return r; +} + +__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { + word_t r; + r.o = lo2hi.o << 16 | hi2lo.o >> 16; + r.e = lo2hi.e << 16 | hi2lo.e >> 16; + return r; +} + +__forceinline int NOTZERO(word_t a, word_t b) { + int result = 0; + for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i]; + for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i]; + return result; +} + +/* set padding byte in 64-bit Ascon word */ +__forceinline word_t PAD(int i) { + return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32); +} + +/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ +__forceinline word_t XMASK(int n) { + uint32_t mask = 0x0fffffff >> (n * 4 - 4); + return WORD_T((uint64_t)mask << 32 | mask); +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t TOBI32(uint64_t in) { + uint32_t hi = in >> 32; + uint32_t lo = in; + uint32_t r0, r1; + r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); + r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); + r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); + r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); + r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); + r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); + r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); + r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); + r0 = (lo & 0x0000FFFF) | (hi << 16); + r1 = (lo >> 16) | (hi & 0xFFFF0000); + return (uint64_t)r1 << 32 | r0; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t FROMBI32(uint64_t in) { + uint32_t r0 = in; + uint32_t r1 = in >> 32; + uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); + uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000); + r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); + r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); + r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); + r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); + r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); + r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); + r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); + r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); + return (uint64_t)hi << 32 | lo; +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/api.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/api.h index a4aa567..96a7c47 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/api.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/api.h @@ -3,3 +3,4 @@ #define CRYPTO_NPUBBYTES 16 #define CRYPTO_ABYTES 16 #define CRYPTO_NOOVERLAP 1 +#define ASCON_RATE 8 diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/ascon.c b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/ascon.c new file mode 100644 index 0000000..5e60f1a --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/ascon.c @@ -0,0 +1,63 @@ +#include "ascon.h" + +#include "api.h" +#include "loadstore.h" +#include "permutations.h" +#include "printstate.h" + +void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, + uint8_t mode); + +void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen, + const uint8_t* ad, uint64_t adlen, const uint8_t* npub, + const uint8_t* k, uint8_t mode) { + word_t N0, N1, K0, K1, K2; + /* load nonce */ + N0 = LOAD64(npub); + N1 = LOAD64(npub + 8); + /* load key */ + if (CRYPTO_KEYBYTES == 20) { + K0 = KEYROT(WORD_T(0), LOAD(k, 4)); + k += 4; + } + K1 = LOAD64(k); + K2 = LOAD64(k + 8); + /* initialization */ + s->x0 = IV; + if (CRYPTO_KEYBYTES == 20) XOR(s->x0, K0); + s->x1 = K1; + s->x2 = K2; + s->x3 = N0; + s->x4 = N1; + P12(s); + if (CRYPTO_KEYBYTES == 20) XOR(s->x2, K0); + XOR(s->x3, K1); + XOR(s->x4, K2); + printstate("initialization", s); + /* process associated data */ + if (adlen) { + process_data(s, (void*)0, ad, adlen, ASCON_AD); + PB(s); + } + XOR(s->x4, WORD_T(1)); + /* process plaintext/ciphertext */ + process_data(s, out, in, tlen, mode); + /* finalization */ + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) { + XOR(s->x1, K1); + XOR(s->x2, K2); + } + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { + XOR(s->x2, K1); + XOR(s->x3, K2); + } + if (CRYPTO_KEYBYTES == 20) { + XOR(s->x1, KEYROT(K0, K1)); + XOR(s->x2, KEYROT(K1, K2)); + XOR(s->x3, KEYROT(K2, WORD_T(0))); + } + P12(s); + XOR(s->x3, K1); + XOR(s->x4, K2); + printstate("finalization", s); +} diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/ascon.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/ascon.h new file mode 100644 index 0000000..aa685d3 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/ascon.h @@ -0,0 +1,24 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#include "config.h" +#include "word.h" + +typedef struct { + word_t x0, x1, x2, x3, x4; +} state_t; + +#define ASCON_AD 0 +#define ASCON_ENC 1 +#define ASCON_DEC 2 + +void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, + uint8_t mode); + +void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen, + const uint8_t* ad, uint64_t adlen, const uint8_t* npub, + const uint8_t* k, uint8_t mode); + +#endif // ASCON_H_ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/config.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/config.h new file mode 100644 index 0000000..19426ab --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/config.h @@ -0,0 +1,34 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* inline the Ascon mode */ +#ifndef ASCON_INLINE_MODE +#define ASCON_INLINE_MODE 1 +#endif + +/* inline the Ascon permutations */ +#ifndef ASCON_INLINE_PERM +#define ASCON_INLINE_PERM 0 +#endif + +/* single function for all permutations */ +#ifndef ASCON_SINGLE_PERM +#define ASCON_SINGLE_PERM 1 +#endif + +/* unroll the permutation loops */ +#ifndef ASCON_UNROLL_LOOPS +#define ASCON_UNROLL_LOOPS 1 +#endif + +/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ +#ifndef ASCON_DATA_ACCESS +#define ASCON_DATA_ACCESS 'B' +#endif + +/* make sure __forceinline is supported */ +#ifndef __forceinline +#define __forceinline inline __attribute__((always_inline)) +#endif + +#endif /* CONFIG_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/decrypt.c b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/decrypt.c index 7e9dd1a..605bd03 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/decrypt.c +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/decrypt.c @@ -1,32 +1,32 @@ -#include "core.h" +#include "api.h" +#include "ascon.h" +#include "loadstore.h" +#include "permutations.h" +#include "printstate.h" -int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen, - unsigned char* nsec, const unsigned char* c, - unsigned long long clen, const unsigned char* ad, - unsigned long long adlen, const unsigned char* npub, - const unsigned char* k) { +void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen, + const uint8_t* ad, uint64_t adlen, const uint8_t* npub, + const uint8_t* k, uint8_t mode); + +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { if (clen < CRYPTO_ABYTES) { *mlen = 0; return -1; } - - state s; - u32_2 t0, t1; + state_t s; (void)nsec; - - // set plaintext size + /* set plaintext size */ *mlen = clen - CRYPTO_ABYTES; - + /* ascon decryption */ ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DEC); - - // verify tag (should be constant time, check compiler output) - t0 = to_bit_interleaving(U64BIG(*(u64*)(c + *mlen))); - t1 = to_bit_interleaving(U64BIG(*(u64*)(c + *mlen + 8))); - if (((s.x3.e ^ t0.e) | (s.x3.o ^ t0.o) | (s.x4.e ^ t1.e) | (s.x4.o ^ t1.o)) != - 0) { + /* verify tag (should be constant time, check compiler output) */ + XOR(s.x3, LOAD64(c + *mlen)); + XOR(s.x4, LOAD64(c + *mlen + 8)); + if (NOTZERO(s.x3, s.x4)) { *mlen = 0; return -1; } - return 0; } diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/encrypt.c b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/encrypt.c index b5dc587..641b266 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/encrypt.c +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/encrypt.c @@ -1,24 +1,25 @@ -#include "core.h" +#include "api.h" +#include "ascon.h" +#include "loadstore.h" +#include "permutations.h" +#include "printstate.h" -int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen, - const unsigned char* m, unsigned long long mlen, - const unsigned char* ad, unsigned long long adlen, - const unsigned char* nsec, const unsigned char* npub, - const unsigned char* k) { - state s; - u64 tmp0, tmp1; - (void)nsec; +void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen, + const uint8_t* ad, uint64_t adlen, const uint8_t* npub, + const uint8_t* k, uint8_t mode); - // set ciphertext size +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + state_t s; + (void)nsec; + /* set ciphertext size */ *clen = mlen + CRYPTO_ABYTES; - + /* ascon encryption */ ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENC); - - // set tag - tmp0 = from_bit_interleaving(s.x3); - *(u64*)(c + mlen) = U64BIG(tmp0); - tmp1 = from_bit_interleaving(s.x4); - *(u64*)(c + mlen + 8) = U64BIG(tmp1); - + /* set tag */ + STORE64(c + mlen, s.x3); + STORE64(c + mlen + 8, s.x4); return 0; } diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/endian.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/endian.h index b4d18f5..3944360 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/endian.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/endian.h @@ -3,7 +3,10 @@ #if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -// macros for big endian machines +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif #define U64BIG(x) (x) #define U32BIG(x) (x) #define U16BIG(x) (x) @@ -11,19 +14,26 @@ #elif defined(_MSC_VER) || \ (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -// macros for little endian machines -#define U64BIG(x) \ - ((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) | \ - (((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) | \ - (((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) | \ - (((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56)) -#define U32BIG(x) \ - ((((x)&0x000000FF) << 24) | (((x)&0x0000FF00) << 8) | \ - (((x)&0x00FF0000) >> 8) | (((x)&0xFF000000) >> 24)) -#define U16BIG(x) ((((x)&0x00FF) << 8) | (((x)&0xFF00) >> 8)) +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) +#define U32BIG(x) \ + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) #else -#error "ascon byte order macros not defined in endian.h" +#error "Ascon byte order macros not defined in endian.h" #endif -#endif // ENDIAN_H_ +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/loadstore.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/loadstore.h new file mode 100644 index 0000000..6cb0cca --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/loadstore.h @@ -0,0 +1,118 @@ +#ifndef LOADSTORE_H_ +#define LOADSTORE_H_ + +#include + +#include "config.h" +#include "endian.h" +#include "word.h" + +/* 64-bit LSB mask (undefined for n == 0) */ +#define MASK(n) (~0ull >> (64 - (n))) + +/* get byte from Ascon 64-bit word */ +#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i)))) + +/* set byte in Ascon 64-bit word */ +#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i))) + +#if ASCON_DATA_ACCESS == 'W' + +#ifndef NDEBUG +#pragma message("Using wordwise data access") +#endif + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = U64BIG(*(uint64_t*)bytes); + return U64TOWORD(x); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = U64BIG(*(uint64_t*)bytes & MASK(8 * n)); + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(8 * n); + *(uint64_t*)bytes |= U64BIG(x); +} + +#elif ASCON_DATA_ACCESS == 'M' + +#ifndef NDEBUG +#pragma message("Using memcpy to access data") +#endif + +#include + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + memcpy((uint8_t*)&x, bytes, n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = U64BIG(WORDTOU64(w)); + memcpy(bytes, (uint8_t*)&x, n); +} + +#elif ASCON_DATA_ACCESS == 'B' + +#ifndef NDEBUG +#pragma message("Using bytewise data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#elif ASCON_DATA_ACCESS == 'H' + +#ifndef NDEBUG +#pragma message("Using hybrid data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + if (n == 8) + x = U64BIG(*(uint64_t*)bytes); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + if (n == 8) + *(uint64_t*)bytes = U64BIG(x); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#else +#error "Ascon data access macro not defined correctly" +#endif + +#endif /* LOADSTORE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/permutations.c b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/permutations.c index bc47f5f..1bca2ef 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/permutations.c +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/permutations.c @@ -1,50 +1,35 @@ #include "permutations.h" -static const u8 constants[][2] = { - {0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, {0x6, 0xc}, {0x3, 0xc}, - {0x6, 0x9}, {0x3, 0x9}, {0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}}; - -// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 -u32_2 to_bit_interleaving(u64 in) { - u32 hi = (in) >> 32; - u32 lo = (u32)(in); - u32 r0, r1; - u32_2 out; - r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); - r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); - r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); - r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); - r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); - r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); - r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); - r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); - out.e = (lo & 0x0000FFFF) | (hi << 16); - out.o = (lo >> 16) | (hi & 0xFFFF0000); - return out; -} +#include "round.h" -// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 -u64 from_bit_interleaving(u32_2 in) { - u32 lo = (in.e & 0x0000FFFF) | (in.o << 16); - u32 hi = (in.e >> 16) | (in.o & 0xFFFF0000); - u32 r0, r1; - u64 out; - r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); - r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); - r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); - r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); - r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); - r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); - r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); - r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); - out = (u64)hi << 32 | lo; - return out; -} +#if !ASCON_UNROLL_LOOPS || ASCON_SINGLE_PERM + +const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, + {0x6, 0xc}, {0x3, 0xc}, {0x6, 0x9}, {0x3, 0x9}, + {0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}}; + +#endif + +#if ASCON_INLINE_PERM + +#elif ASCON_SINGLE_PERM -void P(state *p, u8 rounds) { - state s = *p; - u32_2 t0, t1, t2, t3, t4; - u32 i, start = START_ROUND(rounds); - for (i = start; i < 12; i++) ROUND(constants[i][0], constants[i][1]); - *p = s; +void P(state_t* s, uint8_t rounds) { + printstate(" permutation input", s); + for (int i = START(rounds); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); } + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s) { P12ROUNDS(s); } + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16 +void P8(state_t* s) { P8ROUNDS(s); } +#endif + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 +void P6(state_t* s) { P6ROUNDS(s); } +#endif + +#endif diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/permutations.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/permutations.h index bc643ce..ef338f1 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/permutations.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/permutations.h @@ -1,71 +1,189 @@ #ifndef PERMUTATIONS_H_ #define PERMUTATIONS_H_ -typedef unsigned char u8; -typedef unsigned int u32; -typedef unsigned long long u64; - -typedef struct { - u32 e; - u32 o; -} u32_2; - -typedef struct { - u32_2 x0; - u32_2 x1; - u32_2 x2; - u32_2 x3; - u32_2 x4; -} state; - -#define EXT_BYTE64(x, n) ((u8)((u64)(x) >> (8 * (7 - (n))))) -#define INS_BYTE64(x, n) ((u64)(x) << (8 * (7 - (n)))) -#define ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n)))) -#define START_ROUND(x) (12 - (x)) - -// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 -u32_2 to_bit_interleaving(u64 in); - -// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 -u64 from_bit_interleaving(u32_2 in); - -/* clang-format off */ -#define ROUND(C_e, C_o) \ - do { \ - /* round constant */ \ - s.x2.e ^= C_e; s.x2.o ^= C_o; \ - /* s-box layer */ \ - s.x0.e ^= s.x4.e; s.x0.o ^= s.x4.o; \ - s.x4.e ^= s.x3.e; s.x4.o ^= s.x3.o; \ - s.x2.e ^= s.x1.e; s.x2.o ^= s.x1.o; \ - t0.e = s.x0.e; t0.o = s.x0.o; \ - t4.e = s.x4.e; t4.o = s.x4.o; \ - t3.e = s.x3.e; t3.o = s.x3.o; \ - t1.e = s.x1.e; t1.o = s.x1.o; \ - t2.e = s.x2.e; t2.o = s.x2.o; \ - s.x0.e = t0.e ^ (~t1.e & t2.e); s.x0.o = t0.o ^ (~t1.o & t2.o); \ - s.x2.e = t2.e ^ (~t3.e & t4.e); s.x2.o = t2.o ^ (~t3.o & t4.o); \ - s.x4.e = t4.e ^ (~t0.e & t1.e); s.x4.o = t4.o ^ (~t0.o & t1.o); \ - s.x1.e = t1.e ^ (~t2.e & t3.e); s.x1.o = t1.o ^ (~t2.o & t3.o); \ - s.x3.e = t3.e ^ (~t4.e & t0.e); s.x3.o = t3.o ^ (~t4.o & t0.o); \ - s.x1.e ^= s.x0.e; s.x1.o ^= s.x0.o; \ - s.x3.e ^= s.x2.e; s.x3.o ^= s.x2.o; \ - s.x0.e ^= s.x4.e; s.x0.o ^= s.x4.o; \ - /* linear layer */ \ - t0.e = s.x0.e ^ ROTR32(s.x0.o, 4); t0.o = s.x0.o ^ ROTR32(s.x0.e, 5); \ - t1.e = s.x1.e ^ ROTR32(s.x1.e, 11); t1.o = s.x1.o ^ ROTR32(s.x1.o, 11); \ - t2.e = s.x2.e ^ ROTR32(s.x2.o, 2); t2.o = s.x2.o ^ ROTR32(s.x2.e, 3); \ - t3.e = s.x3.e ^ ROTR32(s.x3.o, 3); t3.o = s.x3.o ^ ROTR32(s.x3.e, 4); \ - t4.e = s.x4.e ^ ROTR32(s.x4.e, 17); t4.o = s.x4.o ^ ROTR32(s.x4.o, 17); \ - s.x0.e ^= ROTR32(t0.o, 9); s.x0.o ^= ROTR32(t0.e, 10); \ - s.x1.e ^= ROTR32(t1.o, 19); s.x1.o ^= ROTR32(t1.e, 20); \ - s.x2.e ^= t2.o; s.x2.o ^= ROTR32(t2.e, 1); \ - s.x3.e ^= ROTR32(t3.e, 5); s.x3.o ^= ROTR32(t3.o, 5); \ - s.x4.e ^= ROTR32(t4.o, 3); s.x4.o ^= ROTR32(t4.e, 4); \ - s.x2.e = ~s.x2.e; s.x2.o = ~s.x2.o; \ - } while(0) -/* clang-format on */ - -void P(state *p, u8 rounds); - -#endif // PERMUTATIONS_H_ +#include + +#include "api.h" +#include "ascon.h" +#include "printstate.h" +#include "round.h" + +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 + +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 + +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 + +#define ASCON_HASH_BYTES 32 + +#define ASCON_128_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_128A_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) + +#define ASCON_80PQ_IV \ + U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_HASH_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) + +#define ASCON_XOF_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) + +#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) +#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) +#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) +#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) +#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) + +#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) +#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) +#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) +#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) +#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 +#define IV ASCON_128_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#if ASCON_RATE == 16 +#define IV ASCON_128A_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define PB P8 +#endif + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 +#define IV ASCON_80PQ_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#define START(n) (12 - n) + +#if ASCON_UNROLL_LOOPS + +__forceinline void P12ROUNDS(state_t* s) { + ROUND(s, 0xc, 0xc); + ROUND(s, 0x9, 0xc); + ROUND(s, 0xc, 0x9); + ROUND(s, 0x9, 0x9); + ROUND(s, 0x6, 0xc); + ROUND(s, 0x3, 0xc); + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +__forceinline void P8ROUNDS(state_t* s) { + ROUND(s, 0x6, 0xc); + ROUND(s, 0x3, 0xc); + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +__forceinline void P6ROUNDS(state_t* s) { + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +#else /* !ASCON_UNROLL_LOOPS */ + +extern const uint8_t constants[][2]; + +__forceinline void P12ROUNDS(state_t* s) { + for (int i = START(12); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +__forceinline void P8ROUNDS(state_t* s) { + for (int i = START(8); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +__forceinline void P6ROUNDS(state_t* s) { + for (int i = START(6); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +#endif + +#if ASCON_INLINE_PERM + +__forceinline void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +__forceinline void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} + +__forceinline void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#elif ASCON_SINGLE_PERM + +#define P12(s) P(s, 12) +#define P8(s) P(s, 8) +#define P6(s) P(s, 6) + +void P(state_t* s, uint8_t rounds); + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s); +void P8(state_t* s); +void P6(state_t* s); + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#endif + +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/printstate.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/printstate.h new file mode 100644 index 0000000..34bd476 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/printstate.h @@ -0,0 +1,31 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/process.c b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/process.c new file mode 100644 index 0000000..81fdd15 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/process.c @@ -0,0 +1,31 @@ +#include "api.h" +#include "ascon.h" +#include "loadstore.h" +#include "permutations.h" +#include "printstate.h" + +void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, + uint8_t mode) { + word_t tmp0; + int n = 0; + while (len) { + /* determine block size */ + n = len < ASCON_RATE ? len : ASCON_RATE; + /* absorb data */ + tmp0 = LOAD(in, n); + XOR(s->x0, tmp0); + /* extract data */ + if (mode != ASCON_AD) STORE(out, s->x0, n); + /* insert data */ + if (mode == ASCON_DEC) { + AND(s->x0, XMASK(n)); + XOR(s->x0, tmp0); + } + /* compute permutation for full blocks */ + if (n == ASCON_RATE) PB(s); + in += n; + out += n; + len -= n; + } + XOR(s->x0, PAD(n % 8)); +} diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/round.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/round.h new file mode 100644 index 0000000..d8ea3b6 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/round.h @@ -0,0 +1,85 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" + +__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { + *K0 = WORD_T(0); + *K1 = WORD_T(0); + *K2 = WORD_T(0); +} + +__forceinline void PINIT(state_t* s) { + s->x0 = WORD_T(0); + s->x1 = WORD_T(0); + s->x2 = WORD_T(0); + s->x3 = WORD_T(0); + s->x4 = WORD_T(0); +} + +__forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) { + state_t t; + /* round constant */ + s->x2.e ^= C_e; + s->x2.o ^= C_o; + /* s-box layer */ + s->x0.e ^= s->x4.e; + s->x0.o ^= s->x4.o; + s->x4.e ^= s->x3.e; + s->x4.o ^= s->x3.o; + s->x2.e ^= s->x1.e; + s->x2.o ^= s->x1.o; + t.x0.e = s->x0.e; + t.x0.o = s->x0.o; + t.x4.e = s->x4.e; + t.x4.o = s->x4.o; + t.x3.e = s->x3.e; + t.x3.o = s->x3.o; + t.x1.e = s->x1.e; + t.x1.o = s->x1.o; + t.x2.e = s->x2.e; + t.x2.o = s->x2.o; + s->x0.e = t.x0.e ^ (~t.x1.e & t.x2.e); + s->x0.o = t.x0.o ^ (~t.x1.o & t.x2.o); + s->x2.e = t.x2.e ^ (~t.x3.e & t.x4.e); + s->x2.o = t.x2.o ^ (~t.x3.o & t.x4.o); + s->x4.e = t.x4.e ^ (~t.x0.e & t.x1.e); + s->x4.o = t.x4.o ^ (~t.x0.o & t.x1.o); + s->x1.e = t.x1.e ^ (~t.x2.e & t.x3.e); + s->x1.o = t.x1.o ^ (~t.x2.o & t.x3.o); + s->x3.e = t.x3.e ^ (~t.x4.e & t.x0.e); + s->x3.o = t.x3.o ^ (~t.x4.o & t.x0.o); + s->x1.e ^= s->x0.e; + s->x1.o ^= s->x0.o; + s->x3.e ^= s->x2.e; + s->x3.o ^= s->x2.o; + s->x0.e ^= s->x4.e; + s->x0.o ^= s->x4.o; + /* linear layer */ + t.x0.e = s->x0.e ^ ROR32(s->x0.o, 4); + t.x0.o = s->x0.o ^ ROR32(s->x0.e, 5); + t.x1.e = s->x1.e ^ ROR32(s->x1.e, 11); + t.x1.o = s->x1.o ^ ROR32(s->x1.o, 11); + t.x2.e = s->x2.e ^ ROR32(s->x2.o, 2); + t.x2.o = s->x2.o ^ ROR32(s->x2.e, 3); + t.x3.e = s->x3.e ^ ROR32(s->x3.o, 3); + t.x3.o = s->x3.o ^ ROR32(s->x3.e, 4); + t.x4.e = s->x4.e ^ ROR32(s->x4.e, 17); + t.x4.o = s->x4.o ^ ROR32(s->x4.o, 17); + s->x0.e ^= ROR32(t.x0.o, 9); + s->x0.o ^= ROR32(t.x0.e, 10); + s->x1.e ^= ROR32(t.x1.o, 19); + s->x1.o ^= ROR32(t.x1.e, 20); + s->x2.e ^= t.x2.o; + s->x2.o ^= ROR32(t.x2.e, 1); + s->x3.e ^= ROR32(t.x3.e, 5); + s->x3.o ^= ROR32(t.x3.o, 5); + s->x4.e ^= ROR32(t.x4.o, 3); + s->x4.o ^= ROR32(t.x4.e, 4); + s->x2.e = ~s->x2.e; + s->x2.o = ~s->x2.o; + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/word.c b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/word.c new file mode 100644 index 0000000..0ac4e63 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/word.c @@ -0,0 +1,36 @@ +#include "word.h" + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +uint64_t TOBI32(uint64_t in) { + uint32_t hi = in >> 32; + uint32_t lo = in; + uint32_t r0, r1; + r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); + r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); + r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); + r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); + r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); + r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); + r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); + r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); + r0 = (lo & 0x0000FFFF) | (hi << 16); + r1 = (lo >> 16) | (hi & 0xFFFF0000); + return (uint64_t)r1 << 32 | r0; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +uint64_t FROMBI32(uint64_t in) { + uint32_t r0 = in; + uint32_t r1 = in >> 32; + uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); + uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000); + r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); + r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); + r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); + r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); + r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); + r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); + r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); + r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); + return (uint64_t)hi << 32 | lo; +} diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/word.h b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/word.h new file mode 100644 index 0000000..45184ca --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi32_lowsize/word.h @@ -0,0 +1,82 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#include "config.h" + +typedef struct { + uint32_t e; + uint32_t o; +} word_t; + +__forceinline word_t WORD_T(uint64_t x) { + return (word_t){.o = x >> 32, .e = x}; +} + +__forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; } + +uint64_t TOBI32(uint64_t in); + +uint64_t FROMBI32(uint64_t in); + +__forceinline word_t U64TOWORD(uint64_t x) { + uint64_t w = TOBI32(x); + return (word_t){.o = w >> 32, .e = w}; +} + +__forceinline uint64_t WORDTOU64(word_t w) { + return FROMBI32((uint64_t)w.o << 32 | w.e); +} + +#define XOR(a, b) \ + do { \ + word_t tb = b; \ + (a).e ^= tb.e; \ + (a).o ^= tb.o; \ + } while (0) + +#define AND(a, b) \ + do { \ + word_t tb = b; \ + (a).e &= tb.e; \ + (a).o &= tb.o; \ + } while (0) + +__forceinline uint32_t ROR32(uint32_t x, int n) { + return x >> n | x << (32 - n); +} + +__forceinline word_t ROR64(word_t x, int n) { + word_t r; + r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); + r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); + return r; +} + +__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { + word_t r; + r.o = lo2hi.o << 16 | hi2lo.o >> 16; + r.e = lo2hi.e << 16 | hi2lo.e >> 16; + return r; +} + +__forceinline int NOTZERO(word_t a, word_t b) { + int result = 0; + for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i]; + for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i]; + return result; +} + +/* set padding byte in 64-bit Ascon word */ +__forceinline word_t PAD(int i) { + return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32); +} + +/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ +__forceinline word_t XMASK(int n) { + uint32_t mask = 0x0fffffff >> (n * 4 - 4); + return WORD_T((uint64_t)mask << 32 | mask); +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi8/api.h b/ascon/Implementations/crypto_aead/ascon128v12/bi8/api.h index a4aa567..96a7c47 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/bi8/api.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi8/api.h @@ -3,3 +3,4 @@ #define CRYPTO_NPUBBYTES 16 #define CRYPTO_ABYTES 16 #define CRYPTO_NOOVERLAP 1 +#define ASCON_RATE 8 diff --git a/ascon/Implementations/crypto_aead/ascon128v12/bi8/ascon.c b/ascon/Implementations/crypto_aead/ascon128v12/bi8/ascon.c index 7bcfadc..801ccf4 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/bi8/ascon.c +++ b/ascon/Implementations/crypto_aead/ascon128v12/bi8/ascon.c @@ -1,15 +1,11 @@ -#include "api.h" +#include -typedef unsigned char u8; -typedef unsigned short u16; -typedef unsigned int u32; -typedef unsigned long long u64; +#include "api.h" -#define RATE (64 / 8) #define PA_ROUNDS 12 #define PB_ROUNDS 6 -#define ROTR8(x, n) (((x) >> (n)) | ((x) << (8 - (n)))) +#define ROR8(x, n) (((x) >> (n)) | ((x) << (8 - (n)))) #define COMPRESS_BYTE_ARRAY_8(a, var_7, var_6, var_5, var_4, var_3, var_2, \ var_1, var_0) \ @@ -269,8 +265,8 @@ typedef unsigned long long u64; a[7] |= t1_7 & 128; \ } while (0) -// This way of implementing Ascon's S-box was inpired by personal communication -// with Joan Daemen about implementing the 3-bit chi layer. +/* This way of implementing Ascon's S-box was inpired by personal communication + with Joan Daemen about implementing the 3-bit chi layer. */ #define ROUND_16(C_7, C_6, C_5, C_4, C_3, C_2, C_1, C_0) \ do { \ /* round constant */ \ @@ -428,22 +424,22 @@ typedef unsigned long long u64; t0_5 = x0_5; \ t0_6 = x0_6; \ t0_7 = x0_7; \ - x0_5 ^= ROTR8(t0_0, 3); \ - x0_6 ^= ROTR8(t0_1, 3); \ - x0_7 ^= ROTR8(t0_2, 3); \ - x0_0 ^= ROTR8(t0_3, 2); \ - x0_1 ^= ROTR8(t0_4, 2); \ - x0_2 ^= ROTR8(t0_5, 2); \ - x0_3 ^= ROTR8(t0_6, 2); \ - x0_4 ^= ROTR8(t0_7, 2); \ - x0_4 ^= ROTR8(t0_0, 4); \ - x0_5 ^= ROTR8(t0_1, 4); \ - x0_6 ^= ROTR8(t0_2, 4); \ - x0_7 ^= ROTR8(t0_3, 4); \ - x0_0 ^= ROTR8(t0_4, 3); \ - x0_1 ^= ROTR8(t0_5, 3); \ - x0_2 ^= ROTR8(t0_6, 3); \ - x0_3 ^= ROTR8(t0_7, 3); \ + x0_5 ^= ROR8(t0_0, 3); \ + x0_6 ^= ROR8(t0_1, 3); \ + x0_7 ^= ROR8(t0_2, 3); \ + x0_0 ^= ROR8(t0_3, 2); \ + x0_1 ^= ROR8(t0_4, 2); \ + x0_2 ^= ROR8(t0_5, 2); \ + x0_3 ^= ROR8(t0_6, 2); \ + x0_4 ^= ROR8(t0_7, 2); \ + x0_4 ^= ROR8(t0_0, 4); \ + x0_5 ^= ROR8(t0_1, 4); \ + x0_6 ^= ROR8(t0_2, 4); \ + x0_7 ^= ROR8(t0_3, 4); \ + x0_0 ^= ROR8(t0_4, 3); \ + x0_1 ^= ROR8(t0_5, 3); \ + x0_2 ^= ROR8(t0_6, 3); \ + x0_3 ^= ROR8(t0_7, 3); \ t0_0 = x1_0; \ t0_1 = x1_1; \ t0_2 = x1_2; \ @@ -457,17 +453,17 @@ typedef unsigned long long u64; x1_5 ^= t0_2; \ x1_6 ^= t0_3; \ x1_7 ^= t0_4; \ - x1_0 ^= ROTR8(t0_5, 7); \ - x1_1 ^= ROTR8(t0_6, 7); \ - x1_2 ^= ROTR8(t0_7, 7); \ - x1_1 ^= ROTR8(t0_0, 5); \ - x1_2 ^= ROTR8(t0_1, 5); \ - x1_3 ^= ROTR8(t0_2, 5); \ - x1_4 ^= ROTR8(t0_3, 5); \ - x1_5 ^= ROTR8(t0_4, 5); \ - x1_6 ^= ROTR8(t0_5, 5); \ - x1_7 ^= ROTR8(t0_6, 5); \ - x1_0 ^= ROTR8(t0_7, 4); \ + x1_0 ^= ROR8(t0_5, 7); \ + x1_1 ^= ROR8(t0_6, 7); \ + x1_2 ^= ROR8(t0_7, 7); \ + x1_1 ^= ROR8(t0_0, 5); \ + x1_2 ^= ROR8(t0_1, 5); \ + x1_3 ^= ROR8(t0_2, 5); \ + x1_4 ^= ROR8(t0_3, 5); \ + x1_5 ^= ROR8(t0_4, 5); \ + x1_6 ^= ROR8(t0_5, 5); \ + x1_7 ^= ROR8(t0_6, 5); \ + x1_0 ^= ROR8(t0_7, 4); \ t0_0 = x2_0; \ t0_1 = x2_1; \ t0_2 = x2_2; \ @@ -476,7 +472,7 @@ typedef unsigned long long u64; t0_5 = x2_5; \ t0_6 = x2_6; \ t0_7 = x2_7; \ - x2_7 ^= ROTR8(t0_0, 1); \ + x2_7 ^= ROR8(t0_0, 1); \ x2_0 ^= t0_1; \ x2_1 ^= t0_2; \ x2_2 ^= t0_3; \ @@ -484,12 +480,12 @@ typedef unsigned long long u64; x2_4 ^= t0_5; \ x2_5 ^= t0_6; \ x2_6 ^= t0_7; \ - x2_2 ^= ROTR8(t0_0, 1); \ - x2_3 ^= ROTR8(t0_1, 1); \ - x2_4 ^= ROTR8(t0_2, 1); \ - x2_5 ^= ROTR8(t0_3, 1); \ - x2_6 ^= ROTR8(t0_4, 1); \ - x2_7 ^= ROTR8(t0_5, 1); \ + x2_2 ^= ROR8(t0_0, 1); \ + x2_3 ^= ROR8(t0_1, 1); \ + x2_4 ^= ROR8(t0_2, 1); \ + x2_5 ^= ROR8(t0_3, 1); \ + x2_6 ^= ROR8(t0_4, 1); \ + x2_7 ^= ROR8(t0_5, 1); \ x2_0 ^= t0_6; \ x2_1 ^= t0_7; \ t0_0 = x3_0; \ @@ -500,22 +496,22 @@ typedef unsigned long long u64; t0_5 = x3_5; \ t0_6 = x3_6; \ t0_7 = x3_7; \ - x3_6 ^= ROTR8(t0_0, 2); \ - x3_7 ^= ROTR8(t0_1, 2); \ - x3_0 ^= ROTR8(t0_2, 1); \ - x3_1 ^= ROTR8(t0_3, 1); \ - x3_2 ^= ROTR8(t0_4, 1); \ - x3_3 ^= ROTR8(t0_5, 1); \ - x3_4 ^= ROTR8(t0_6, 1); \ - x3_5 ^= ROTR8(t0_7, 1); \ - x3_7 ^= ROTR8(t0_0, 3); \ - x3_0 ^= ROTR8(t0_1, 2); \ - x3_1 ^= ROTR8(t0_2, 2); \ - x3_2 ^= ROTR8(t0_3, 2); \ - x3_3 ^= ROTR8(t0_4, 2); \ - x3_4 ^= ROTR8(t0_5, 2); \ - x3_5 ^= ROTR8(t0_6, 2); \ - x3_6 ^= ROTR8(t0_7, 2); \ + x3_6 ^= ROR8(t0_0, 2); \ + x3_7 ^= ROR8(t0_1, 2); \ + x3_0 ^= ROR8(t0_2, 1); \ + x3_1 ^= ROR8(t0_3, 1); \ + x3_2 ^= ROR8(t0_4, 1); \ + x3_3 ^= ROR8(t0_5, 1); \ + x3_4 ^= ROR8(t0_6, 1); \ + x3_5 ^= ROR8(t0_7, 1); \ + x3_7 ^= ROR8(t0_0, 3); \ + x3_0 ^= ROR8(t0_1, 2); \ + x3_1 ^= ROR8(t0_2, 2); \ + x3_2 ^= ROR8(t0_3, 2); \ + x3_3 ^= ROR8(t0_4, 2); \ + x3_4 ^= ROR8(t0_5, 2); \ + x3_5 ^= ROR8(t0_6, 2); \ + x3_6 ^= ROR8(t0_7, 2); \ t0_0 = x4_0; \ t0_1 = x4_1; \ t0_2 = x4_2; \ @@ -524,22 +520,22 @@ typedef unsigned long long u64; t0_5 = x4_5; \ t0_6 = x4_6; \ t0_7 = x4_7; \ - x4_1 ^= ROTR8(t0_0, 1); \ - x4_2 ^= ROTR8(t0_1, 1); \ - x4_3 ^= ROTR8(t0_2, 1); \ - x4_4 ^= ROTR8(t0_3, 1); \ - x4_5 ^= ROTR8(t0_4, 1); \ - x4_6 ^= ROTR8(t0_5, 1); \ - x4_7 ^= ROTR8(t0_6, 1); \ + x4_1 ^= ROR8(t0_0, 1); \ + x4_2 ^= ROR8(t0_1, 1); \ + x4_3 ^= ROR8(t0_2, 1); \ + x4_4 ^= ROR8(t0_3, 1); \ + x4_5 ^= ROR8(t0_4, 1); \ + x4_6 ^= ROR8(t0_5, 1); \ + x4_7 ^= ROR8(t0_6, 1); \ x4_0 ^= t0_7; \ - x4_7 ^= ROTR8(t0_0, 6); \ - x4_0 ^= ROTR8(t0_1, 5); \ - x4_1 ^= ROTR8(t0_2, 5); \ - x4_2 ^= ROTR8(t0_3, 5); \ - x4_3 ^= ROTR8(t0_4, 5); \ - x4_4 ^= ROTR8(t0_5, 5); \ - x4_5 ^= ROTR8(t0_6, 5); \ - x4_6 ^= ROTR8(t0_7, 5); \ + x4_7 ^= ROR8(t0_0, 6); \ + x4_0 ^= ROR8(t0_1, 5); \ + x4_1 ^= ROR8(t0_2, 5); \ + x4_2 ^= ROR8(t0_3, 5); \ + x4_3 ^= ROR8(t0_4, 5); \ + x4_4 ^= ROR8(t0_5, 5); \ + x4_5 ^= ROR8(t0_6, 5); \ + x4_6 ^= ROR8(t0_7, 5); \ } while (0) #define P12_8 \ @@ -568,73 +564,72 @@ typedef unsigned long long u64; ROUND_16(0, 1, 0, 0, 1, 0, 1, 1); \ } while (0) -int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, - const unsigned char *m, unsigned long long mlen, - const unsigned char *ad, unsigned long long adlen, - const unsigned char *nsec, const unsigned char *npub, - const unsigned char *k) { - u64 rlen; - u64 i; +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + uint64_t rlen; + uint64_t i; - u8 buffer[8]; + uint8_t buffer[8]; - u8 K0_0; - u8 K1_0; - u8 N0_0; - u8 N1_0; - u8 x0_0, x1_0, x2_0, x3_0, x4_0; - u8 t0_0, t1_0; + uint8_t K0_0; + uint8_t K1_0; + uint8_t N0_0; + uint8_t N1_0; + uint8_t x0_0, x1_0, x2_0, x3_0, x4_0; + uint8_t t0_0, t1_0; - u8 K0_1; - u8 K1_1; - u8 N0_1; - u8 N1_1; - u8 x0_1, x1_1, x2_1, x3_1, x4_1; - u8 t0_1, t1_1; + uint8_t K0_1; + uint8_t K1_1; + uint8_t N0_1; + uint8_t N1_1; + uint8_t x0_1, x1_1, x2_1, x3_1, x4_1; + uint8_t t0_1, t1_1; - u8 K0_2; - u8 K1_2; - u8 N0_2; - u8 N1_2; - u8 x0_2, x1_2, x2_2, x3_2, x4_2; - u8 t0_2, t1_2; + uint8_t K0_2; + uint8_t K1_2; + uint8_t N0_2; + uint8_t N1_2; + uint8_t x0_2, x1_2, x2_2, x3_2, x4_2; + uint8_t t0_2, t1_2; - u8 K0_3; - u8 K1_3; - u8 N0_3; - u8 N1_3; - u8 x0_3, x1_3, x2_3, x3_3, x4_3; - u8 t0_3, t1_3; + uint8_t K0_3; + uint8_t K1_3; + uint8_t N0_3; + uint8_t N1_3; + uint8_t x0_3, x1_3, x2_3, x3_3, x4_3; + uint8_t t0_3, t1_3; - u8 K0_4; - u8 K1_4; - u8 N0_4; - u8 N1_4; - u8 x0_4, x1_4, x2_4, x3_4, x4_4; - u8 t0_4, t1_4; + uint8_t K0_4; + uint8_t K1_4; + uint8_t N0_4; + uint8_t N1_4; + uint8_t x0_4, x1_4, x2_4, x3_4, x4_4; + uint8_t t0_4, t1_4; - u8 K0_5; - u8 K1_5; - u8 N0_5; - u8 N1_5; - u8 x0_5, x1_5, x2_5, x3_5, x4_5; - u8 t0_5, t1_5; + uint8_t K0_5; + uint8_t K1_5; + uint8_t N0_5; + uint8_t N1_5; + uint8_t x0_5, x1_5, x2_5, x3_5, x4_5; + uint8_t t0_5, t1_5; - u8 K0_6; - u8 K1_6; - u8 N0_6; - u8 N1_6; - u8 x0_6, x1_6, x2_6, x3_6, x4_6; - u8 t0_6, t1_6; + uint8_t K0_6; + uint8_t K1_6; + uint8_t N0_6; + uint8_t N1_6; + uint8_t x0_6, x1_6, x2_6, x3_6, x4_6; + uint8_t t0_6, t1_6; - u8 K0_7; - u8 K1_7; - u8 N0_7; - u8 N1_7; - u8 x0_7, x1_7, x2_7, x3_7, x4_7; - u8 t0_7, t1_7; + uint8_t K0_7; + uint8_t K1_7; + uint8_t N0_7; + uint8_t N1_7; + uint8_t x0_7, x1_7, x2_7, x3_7, x4_7; + uint8_t t0_7, t1_7; - u8 in_0, in_1, in_2, in_3, in_4, in_5, in_6, in_7; + uint8_t in_0, in_1, in_2, in_3, in_4, in_5, in_6, in_7; (void)nsec; @@ -645,11 +640,11 @@ int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, COMPRESS_BYTE_ARRAY_8((npub + 8), N1_7, N1_6, N1_5, N1_4, N1_3, N1_2, N1_1, N1_0); - // initialization - buffer[0] = (u8)(CRYPTO_KEYBYTES * 8); - buffer[1] = (u8)(RATE * 8); - buffer[2] = (u8)PA_ROUNDS; - buffer[3] = (u8)PB_ROUNDS; + /* initialization */ + buffer[0] = (uint8_t)(CRYPTO_KEYBYTES * 8); + buffer[1] = (uint8_t)(ASCON_RATE * 8); + buffer[2] = (uint8_t)PA_ROUNDS; + buffer[3] = (uint8_t)PB_ROUNDS; buffer[4] = 0; buffer[5] = 0; buffer[6] = 0; @@ -705,10 +700,10 @@ int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, x4_6 ^= K1_6; x4_7 ^= K1_7; - // process associated data + /* process associated data */ if (adlen) { rlen = adlen; - while (rlen >= RATE) { + while (rlen >= ASCON_RATE) { COMPRESS_BYTE_ARRAY_8(ad, in_7, in_6, in_5, in_4, in_3, in_2, in_1, in_0); x0_0 ^= in_0; x0_1 ^= in_1; @@ -719,8 +714,8 @@ int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, x0_6 ^= in_6; x0_7 ^= in_7; P6_8; - rlen -= RATE; - ad += RATE; + rlen -= ASCON_RATE; + ad += ASCON_RATE; } for (i = 0; i < rlen; ++i, ++ad) buffer[i] = *ad; buffer[rlen] = 0x80; @@ -739,9 +734,9 @@ int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, } x4_0 ^= 1; - // process plaintext + /* process plaintext */ rlen = mlen; - while (rlen >= RATE) { + while (rlen >= ASCON_RATE) { COMPRESS_BYTE_ARRAY_8(m, in_7, in_6, in_5, in_4, in_3, in_2, in_1, in_0); x0_0 ^= in_0; x0_1 ^= in_1; @@ -753,9 +748,9 @@ int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, x0_7 ^= in_7; EXPAND_BYTE_ARRAY_8(c, x0_7, x0_6, x0_5, x0_4, x0_3, x0_2, x0_1, x0_0); P6_8; - rlen -= RATE; - m += RATE; - c += RATE; + rlen -= ASCON_RATE; + m += ASCON_RATE; + c += ASCON_RATE; } for (i = 0; i < rlen; ++i, ++m) buffer[i] = *m; buffer[rlen] = 0x80; @@ -772,7 +767,7 @@ int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, EXPAND_BYTE_ARRAY_8(buffer, x0_7, x0_6, x0_5, x0_4, x0_3, x0_2, x0_1, x0_0); for (i = 0; i < rlen; ++i, ++c) *c = buffer[i]; - // finalization + /* finalization */ x1_0 ^= K0_0; x1_1 ^= K0_1; x1_2 ^= K0_2; @@ -807,7 +802,7 @@ int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, x4_6 ^= K1_6; x4_7 ^= K1_7; - // return tag + /* return tag */ EXPAND_BYTE_ARRAY_8(c, x3_7, x3_6, x3_5, x3_4, x3_3, x3_2, x3_1, x3_0); c += 8; EXPAND_BYTE_ARRAY_8(c, x4_7, x4_6, x4_5, x4_4, x4_3, x4_2, x4_1, x4_0); @@ -816,77 +811,75 @@ int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, return 0; } -int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, - unsigned char *nsec, const unsigned char *c, - unsigned long long clen, const unsigned char *ad, - unsigned long long adlen, const unsigned char *npub, - const unsigned char *k) { +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { *mlen = 0; if (clen < CRYPTO_ABYTES) return -1; - u64 rlen; - u64 i; + uint64_t rlen; + uint64_t i; - u16 ret_val; - u8 buffer[8]; + uint16_t ret_val; + uint8_t buffer[8]; - u8 K0_0; - u8 K1_0; - u8 N0_0; - u8 N1_0; - u8 x0_0, x1_0, x2_0, x3_0, x4_0; - u8 t0_0, t1_0; + uint8_t K0_0; + uint8_t K1_0; + uint8_t N0_0; + uint8_t N1_0; + uint8_t x0_0, x1_0, x2_0, x3_0, x4_0; + uint8_t t0_0, t1_0; - u8 K0_1; - u8 K1_1; - u8 N0_1; - u8 N1_1; - u8 x0_1, x1_1, x2_1, x3_1, x4_1; - u8 t0_1, t1_1; + uint8_t K0_1; + uint8_t K1_1; + uint8_t N0_1; + uint8_t N1_1; + uint8_t x0_1, x1_1, x2_1, x3_1, x4_1; + uint8_t t0_1, t1_1; - u8 K0_2; - u8 K1_2; - u8 N0_2; - u8 N1_2; - u8 x0_2, x1_2, x2_2, x3_2, x4_2; - u8 t0_2, t1_2; + uint8_t K0_2; + uint8_t K1_2; + uint8_t N0_2; + uint8_t N1_2; + uint8_t x0_2, x1_2, x2_2, x3_2, x4_2; + uint8_t t0_2, t1_2; - u8 K0_3; - u8 K1_3; - u8 N0_3; - u8 N1_3; - u8 x0_3, x1_3, x2_3, x3_3, x4_3; - u8 t0_3, t1_3; + uint8_t K0_3; + uint8_t K1_3; + uint8_t N0_3; + uint8_t N1_3; + uint8_t x0_3, x1_3, x2_3, x3_3, x4_3; + uint8_t t0_3, t1_3; - u8 K0_4; - u8 K1_4; - u8 N0_4; - u8 N1_4; - u8 x0_4, x1_4, x2_4, x3_4, x4_4; - u8 t0_4, t1_4; + uint8_t K0_4; + uint8_t K1_4; + uint8_t N0_4; + uint8_t N1_4; + uint8_t x0_4, x1_4, x2_4, x3_4, x4_4; + uint8_t t0_4, t1_4; - u8 K0_5; - u8 K1_5; - u8 N0_5; - u8 N1_5; - u8 x0_5, x1_5, x2_5, x3_5, x4_5; - u8 t0_5, t1_5; + uint8_t K0_5; + uint8_t K1_5; + uint8_t N0_5; + uint8_t N1_5; + uint8_t x0_5, x1_5, x2_5, x3_5, x4_5; + uint8_t t0_5, t1_5; - u8 K0_6; - u8 K1_6; - u8 N0_6; - u8 N1_6; - u8 x0_6, x1_6, x2_6, x3_6, x4_6; - u8 t0_6, t1_6; + uint8_t K0_6; + uint8_t K1_6; + uint8_t N0_6; + uint8_t N1_6; + uint8_t x0_6, x1_6, x2_6, x3_6, x4_6; + uint8_t t0_6, t1_6; - u8 K0_7; - u8 K1_7; - u8 N0_7; - u8 N1_7; - u8 x0_7, x1_7, x2_7, x3_7, x4_7; - u8 t0_7, t1_7; + uint8_t K0_7; + uint8_t K1_7; + uint8_t N0_7; + uint8_t N1_7; + uint8_t x0_7, x1_7, x2_7, x3_7, x4_7; + uint8_t t0_7, t1_7; - u8 in_0, in_1, in_2, in_3, in_4, in_5, in_6, in_7; + uint8_t in_0, in_1, in_2, in_3, in_4, in_5, in_6, in_7; (void)nsec; @@ -897,11 +890,11 @@ int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, COMPRESS_BYTE_ARRAY_8((npub + 8), N1_7, N1_6, N1_5, N1_4, N1_3, N1_2, N1_1, N1_0); - // initialization - buffer[0] = (u8)(CRYPTO_KEYBYTES * 8); - buffer[1] = (u8)(RATE * 8); - buffer[2] = (u8)PA_ROUNDS; - buffer[3] = (u8)PB_ROUNDS; + /* initialization */ + buffer[0] = (uint8_t)(CRYPTO_KEYBYTES * 8); + buffer[1] = (uint8_t)(ASCON_RATE * 8); + buffer[2] = (uint8_t)PA_ROUNDS; + buffer[3] = (uint8_t)PB_ROUNDS; buffer[4] = 0; buffer[5] = 0; buffer[6] = 0; @@ -957,10 +950,10 @@ int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, x4_6 ^= K1_6; x4_7 ^= K1_7; - // process associated data + /* process associated data */ if (adlen) { rlen = adlen; - while (rlen >= RATE) { + while (rlen >= ASCON_RATE) { COMPRESS_BYTE_ARRAY_8(ad, in_7, in_6, in_5, in_4, in_3, in_2, in_1, in_0); x0_0 ^= in_0; x0_1 ^= in_1; @@ -971,8 +964,8 @@ int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, x0_6 ^= in_6; x0_7 ^= in_7; P6_8; - rlen -= RATE; - ad += RATE; + rlen -= ASCON_RATE; + ad += ASCON_RATE; } for (i = 0; i < rlen; ++i, ++ad) buffer[i] = *ad; buffer[rlen] = 0x80; @@ -991,16 +984,16 @@ int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, } x4_0 ^= 1; - // process ciphertext + /* process ciphertext */ rlen = clen - CRYPTO_KEYBYTES; - while (rlen >= RATE) { + while (rlen >= ASCON_RATE) { EXPAND_BYTE_ARRAY_8(m, x0_7, x0_6, x0_5, x0_4, x0_3, x0_2, x0_1, x0_0); for (i = 0; i < 8; ++i) m[i] ^= c[i]; COMPRESS_BYTE_ARRAY_8(c, x0_7, x0_6, x0_5, x0_4, x0_3, x0_2, x0_1, x0_0); P6_8; - rlen -= RATE; - m += RATE; - c += RATE; + rlen -= ASCON_RATE; + m += ASCON_RATE; + c += ASCON_RATE; } EXPAND_BYTE_ARRAY_8(buffer, x0_7, x0_6, x0_5, x0_4, x0_3, x0_2, x0_1, x0_0); for (i = 0; i < rlen; ++i, ++m, ++c) { @@ -1011,7 +1004,7 @@ int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, COMPRESS_BYTE_ARRAY_8(buffer, x0_7, x0_6, x0_5, x0_4, x0_3, x0_2, x0_1, x0_0); - // finalization + /* finalization */ x1_0 ^= K0_0; x1_1 ^= K0_1; x1_2 ^= K0_2; @@ -1046,7 +1039,7 @@ int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, x4_6 ^= K1_6; x4_7 ^= K1_7; - // return -1 if verification fails + /* return -1 if verification fails */ ret_val = 0; EXPAND_BYTE_ARRAY_8(buffer, x3_7, x3_6, x3_5, x3_4, x3_3, x3_2, x3_1, x3_0); @@ -1058,8 +1051,7 @@ int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, if (ret_val != 0) return -1; - // return plaintext + /* return plaintext */ *mlen = clen - CRYPTO_ABYTES; return 0; } - diff --git a/ascon/Implementations/crypto_aead/ascon128v12/neon/api.h b/ascon/Implementations/crypto_aead/ascon128v12/neon/api.h new file mode 100644 index 0000000..96a7c47 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/neon/api.h @@ -0,0 +1,6 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 +#define ASCON_RATE 8 diff --git a/ascon/Implementations/crypto_aead/ascon128v12/neon/decrypt.c b/ascon/Implementations/crypto_aead/ascon128v12/neon/decrypt.c new file mode 100644 index 0000000..5d6fb83 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/neon/decrypt.c @@ -0,0 +1,75 @@ +#include "api.h" +#include "endian.h" +#include "permutations.h" + +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define IV \ + ((uint64_t)(8 * (CRYPTO_KEYBYTES)) << 56 | \ + (uint64_t)(8 * (ASCON_RATE)) << 48 | (uint64_t)(PA_ROUNDS) << 40 | \ + (uint64_t)(PB_ROUNDS) << 32) + +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + + const uint64_t K0 = U64BIG(*(uint64_t*)k); + const uint64_t K1 = U64BIG(*(uint64_t*)(k + 8)); + const uint64_t N0 = U64BIG(*(uint64_t*)npub); + const uint64_t N1 = U64BIG(*(uint64_t*)(npub + 8)); + state_t s; + uint32_t i; + (void)nsec; + + /* set plaintext size */ + *mlen = clen - CRYPTO_ABYTES; + + /* initialization */ + s.x0 = IV; + s.x1 = K0; + s.x2 = K1; + s.x3 = N0; + s.x4 = N1; + P12(); + s.x3 ^= K0; + s.x4 ^= K1; + + /* process associated data */ + if (adlen) { + AD(); + for (i = 0; i < adlen; ++i, ++ad) s.x0 ^= SETBYTE(*ad, i); + s.x0 ^= SETBYTE(0x80, adlen); + P6(); + } + s.x4 ^= 1; + + /* process plaintext */ + clen -= CRYPTO_ABYTES; + CT(); + for (i = 0; i < clen; ++i, ++m, ++c) { + *m = GETBYTE(s.x0, i) ^ *c; + s.x0 &= ~SETBYTE(0xff, i); + s.x0 |= SETBYTE(*c, i); + } + s.x0 ^= SETBYTE(0x80, clen); + + /* finalization */ + s.x1 ^= K0; + s.x2 ^= K1; + P12(); + s.x3 ^= K0; + s.x4 ^= K1; + + /* verify tag (should be constant time, check compiler output) */ + if (((s.x3 ^ U64BIG(*(uint64_t*)c)) | (s.x4 ^ U64BIG(*(uint64_t*)(c + 8)))) != + 0) { + *mlen = 0; + return -1; + } + + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon128v12/neon/encrypt.c b/ascon/Implementations/crypto_aead/ascon128v12/neon/encrypt.c new file mode 100644 index 0000000..52bec6d --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/neon/encrypt.c @@ -0,0 +1,66 @@ +#include "api.h" +#include "endian.h" +#include "permutations.h" + +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define IV \ + ((uint64_t)(8 * (CRYPTO_KEYBYTES)) << 56 | \ + (uint64_t)(8 * (ASCON_RATE)) << 48 | (uint64_t)(PA_ROUNDS) << 40 | \ + (uint64_t)(PB_ROUNDS) << 32) + +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + const uint64_t K0 = U64BIG(*(uint64_t*)k); + const uint64_t K1 = U64BIG(*(uint64_t*)(k + 8)); + const uint64_t N0 = U64BIG(*(uint64_t*)npub); + const uint64_t N1 = U64BIG(*(uint64_t*)(npub + 8)); + state_t s; + uint32_t i; + (void)nsec; + + /* set ciphertext size */ + *clen = mlen + CRYPTO_ABYTES; + + /* initialization */ + s.x0 = IV; + s.x1 = K0; + s.x2 = K1; + s.x3 = N0; + s.x4 = N1; + P12(); + s.x3 ^= K0; + s.x4 ^= K1; + + /* process associated data */ + if (adlen) { + AD(); + for (i = 0; i < adlen; ++i, ++ad) s.x0 ^= SETBYTE(*ad, i); + s.x0 ^= SETBYTE(0x80, adlen); + P6(); + } + s.x4 ^= 1; + + /* process plaintext */ + PT(); + for (i = 0; i < mlen; ++i, ++m, ++c) { + s.x0 ^= SETBYTE(*m, i); + *c = GETBYTE(s.x0, i); + } + s.x0 ^= SETBYTE(0x80, mlen); + + /* finalization */ + s.x1 ^= K0; + s.x2 ^= K1; + P12(); + s.x3 ^= K0; + s.x4 ^= K1; + + /* set tag */ + *(uint64_t*)c = U64BIG(s.x3); + *(uint64_t*)(c + 8) = U64BIG(s.x4); + + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon128v12/neon/endian.h b/ascon/Implementations/crypto_aead/ascon128v12/neon/endian.h new file mode 100644 index 0000000..3944360 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/neon/endian.h @@ -0,0 +1,39 @@ +#ifndef ENDIAN_H_ +#define ENDIAN_H_ + +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif +#define U64BIG(x) (x) +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +#elif defined(_MSC_VER) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) +#define U32BIG(x) \ + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) + +#else +#error "Ascon byte order macros not defined in endian.h" +#endif + +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/neon/implementors b/ascon/Implementations/crypto_aead/ascon128v12/neon/implementors new file mode 100644 index 0000000..b110c1a --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/neon/implementors @@ -0,0 +1,2 @@ +Christoph Dobraunig +Martin Schläffer diff --git a/ascon/Implementations/crypto_aead/ascon128v12/neon/loadstore.h b/ascon/Implementations/crypto_aead/ascon128v12/neon/loadstore.h new file mode 100644 index 0000000..6cb0cca --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/neon/loadstore.h @@ -0,0 +1,118 @@ +#ifndef LOADSTORE_H_ +#define LOADSTORE_H_ + +#include + +#include "config.h" +#include "endian.h" +#include "word.h" + +/* 64-bit LSB mask (undefined for n == 0) */ +#define MASK(n) (~0ull >> (64 - (n))) + +/* get byte from Ascon 64-bit word */ +#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i)))) + +/* set byte in Ascon 64-bit word */ +#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i))) + +#if ASCON_DATA_ACCESS == 'W' + +#ifndef NDEBUG +#pragma message("Using wordwise data access") +#endif + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = U64BIG(*(uint64_t*)bytes); + return U64TOWORD(x); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = U64BIG(*(uint64_t*)bytes & MASK(8 * n)); + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(8 * n); + *(uint64_t*)bytes |= U64BIG(x); +} + +#elif ASCON_DATA_ACCESS == 'M' + +#ifndef NDEBUG +#pragma message("Using memcpy to access data") +#endif + +#include + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + memcpy((uint8_t*)&x, bytes, n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = U64BIG(WORDTOU64(w)); + memcpy(bytes, (uint8_t*)&x, n); +} + +#elif ASCON_DATA_ACCESS == 'B' + +#ifndef NDEBUG +#pragma message("Using bytewise data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#elif ASCON_DATA_ACCESS == 'H' + +#ifndef NDEBUG +#pragma message("Using hybrid data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + if (n == 8) + x = U64BIG(*(uint64_t*)bytes); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + if (n == 8) + *(uint64_t*)bytes = U64BIG(x); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#else +#error "Ascon data access macro not defined correctly" +#endif + +#endif /* LOADSTORE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/neon/permutations.h b/ascon/Implementations/crypto_aead/ascon128v12/neon/permutations.h new file mode 100644 index 0000000..f627d7b --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/neon/permutations.h @@ -0,0 +1,231 @@ +#ifndef PERMUTATIONS_H_ +#define PERMUTATIONS_H_ + +typedef struct { + uint64_t x0, x1, x2, x3, x4; +} state_t; + +static const uint64_t C[12] = { + 0xffffffffffffff0full, 0xffffffffffffff1eull, 0xffffffffffffff2dull, + 0xffffffffffffff3cull, 0xffffffffffffff4bull, 0xffffffffffffff5aull, + 0xffffffffffffff69ull, 0xffffffffffffff78ull, 0xffffffffffffff87ull, + 0xffffffffffffff96ull, 0xffffffffffffffa5ull, 0xffffffffffffffb4ull, +}; + +/* clang-format off */ +#define ROUND(OFFSET) \ + "vldr d31, [%[C], #" #OFFSET "] \n\t" \ + "veor d0, d0, d4 \n\t" \ + "veor d4, d4, d3 \n\t" \ + "veor d2, d2, d31 \n\t" \ + "vbic d13, d0, d4 \n\t" \ + "vbic d12, d4, d3 \n\t" \ + "veor d2, d2, d1 \n\t" \ + "vbic d14, d1, d0 \n\t" \ + "vbic d11, d3, d2 \n\t" \ + "vbic d10, d2, d1 \n\t" \ + "veor q0, q0, q5 \n\t" \ + "veor q1, q1, q6 \n\t" \ + "veor d4, d4, d14 \n\t" \ + "veor d1, d1, d0 \n\t" \ + "veor d3, d3, d2 \n\t" \ + "veor d0, d0, d4 \n\t" \ + "vsri.64 d14, d4, #7 \n\t" \ + "vsri.64 d24, d4, #41 \n\t" \ + "vsri.64 d11, d1, #39 \n\t" \ + "vsri.64 d21, d1, #61 \n\t" \ + "vsri.64 d10, d0, #19 \n\t" \ + "vsri.64 d20, d0, #28 \n\t" \ + "vsri.64 d12, d2, #1 \n\t" \ + "vsri.64 d22, d2, #6 \n\t" \ + "vsri.64 d13, d3, #10 \n\t" \ + "vsri.64 d23, d3, #17 \n\t" \ + "vsli.64 d10, d0, #45 \n\t" \ + "vsli.64 d20, d0, #36 \n\t" \ + "vsli.64 d11, d1, #25 \n\t" \ + "vsli.64 d21, d1, #3 \n\t" \ + "vsli.64 d12, d2, #63 \n\t" \ + "vsli.64 d22, d2, #58 \n\t" \ + "vsli.64 d13, d3, #54 \n\t" \ + "vsli.64 d23, d3, #47 \n\t" \ + "vsli.64 d14, d4, #57 \n\t" \ + "vsli.64 d24, d4, #23 \n\t" \ + "veor q5, q5, q0 \n\t" \ + "veor q6, q6, q1 \n\t" \ + "veor d14, d14, d4 \n\t" \ + "veor q0, q5, q10 \n\t" \ + "veor d4, d14, d24 \n\t" \ + "veor q1, q6, q11 \n\t" +/* clang-format on */ + +#define P12() \ + __asm__ __volatile__ ( \ + ".arm \n\t" \ + ".fpu neon \n\t" \ + "vldm %[s], {d0-d4} \n\t" \ + "vmvn d2, d2 \n\t" \ + ROUND(0) \ + ROUND(8) \ + ROUND(16) \ + ROUND(24) \ + ROUND(32) \ + ROUND(40) \ + ROUND(48) \ + ROUND(56) \ + ROUND(64) \ + ROUND(72) \ + ROUND(80) \ + ROUND(88) \ + "vmvn d2, d2 \n\t" \ + "vstm %[s], {d0-d4} \n\t" \ + :: [s] "r" (&s), [C] "r" (C) \ + : "d0", "d1", "d2", "d3", "d4", \ + "d10", "d11", "d12", "d13", "d14", \ + "d20", "d21", "d22", "d23", "d24", \ + "d31", "memory") + +#define P6() \ + __asm__ __volatile__ ( \ + ".arm \n\t" \ + ".fpu neon \n\t" \ + "vldm %[s], {d0-d4} \n\t" \ + "vmvn d2, d2 \n\t" \ + ROUND(48) \ + ROUND(56) \ + ROUND(64) \ + ROUND(72) \ + ROUND(80) \ + ROUND(88) \ + "vmvn d2, d2 \n\t" \ + "vstm %[s], {d0-d4} \n\t" \ + :: [s] "r" (&s), [C] "r" (C) \ + : "d0", "d1", "d2", "d3", "d4", \ + "d10", "d11", "d12", "d13", "d14", \ + "d20", "d21", "d22", "d23", "d24", \ + "d31", "memory") + +#define AD() \ + do { \ + uint32_t adlen_hi = (uint32_t)(adlen >> 32); \ + uint32_t adlen_lo = (uint32_t)adlen; \ + __asm__ __volatile__ ( \ + ".arm \n\t" \ + ".fpu neon \n\t" \ + "cmp %[adlen_hi], #0 \n\t" \ + "cmpeq %[adlen_lo], #7 \n\t" \ + "bls .LAD1 \n\t" \ + "vldm %[s], {d0-d4} \n\t" \ + ".LAD0: \n\t" \ + "vldm %[ad]!, {d16} \n\t" \ + "vrev64.8 d16, d16 \n\t" \ + "veor d0, d0, d16 \n\t" \ + "vmvn d2, d2 \n\t" \ + ROUND(48) \ + ROUND(56) \ + ROUND(64) \ + ROUND(72) \ + ROUND(80) \ + ROUND(88) \ + "vmvn d2, d2 \n\t" \ + "sub %[adlen_lo], %[adlen_lo], #8 \n\t" \ + "sbc %[adlen_hi], %[adlen_hi], #0 \n\t" \ + "cmp %[adlen_hi], #0 \n\t" \ + "cmpeq %[adlen_lo], #7 \n\t" \ + "bhi .LAD0 \n\t" \ + "vstm %[s], {d0-d4} \n\t" \ + ".LAD1: \n\t" \ + : [adlen_hi] "+r" (adlen_hi), [adlen_lo] "+r" (adlen_lo), \ + [ad] "+r" (ad) \ + : [s] "r" (&s), [C] "r" (C) \ + : "d0", "d1", "d2", "d3", "d4", \ + "d10", "d11", "d12", "d13", "d14", "d16", \ + "d20", "d21", "d22", "d23", "d24", \ + "d31", "memory"); \ + adlen = (uint64_t)adlen_hi << 32 | adlen_lo; \ + } while (0) + +#define PT() \ + do { \ + uint32_t mlen_hi = (uint32_t)(mlen >> 32); \ + uint32_t mlen_lo = (uint32_t)mlen; \ + __asm__ __volatile__ ( \ + ".arm \n\t" \ + ".fpu neon \n\t" \ + "cmp %[mlen_hi], #0 \n\t" \ + "cmpeq %[mlen_lo], #7 \n\t" \ + "bls .LPT1 \n\t" \ + "vldm %[s], {d0-d4} \n\t" \ + ".LPT0: \n\t" \ + "vldm %[m]!, {d16} \n\t" \ + "vrev64.8 d16, d16 \n\t" \ + "veor d0, d0, d16 \n\t" \ + "vrev64.8 d26, d0 \n\t" \ + "vstm %[c]!, {d26} \n\t" \ + "vmvn d2, d2 \n\t" \ + ROUND(48) \ + ROUND(56) \ + ROUND(64) \ + ROUND(72) \ + ROUND(80) \ + ROUND(88) \ + "vmvn d2, d2 \n\t" \ + "sub %[mlen_lo], %[mlen_lo], #8 \n\t" \ + "sbc %[mlen_hi], %[mlen_hi], #0 \n\t" \ + "cmp %[mlen_hi], #0 \n\t" \ + "cmpeq %[mlen_lo], #7 \n\t" \ + "bhi .LPT0 \n\t" \ + "vstm %[s], {d0-d4} \n\t" \ + ".LPT1: \n\t" \ + : [mlen_hi] "+r" (mlen_hi), [mlen_lo] "+r" (mlen_lo), \ + [m] "+r" (m), [c] "+r" (c) \ + : [s] "r" (&s), [C] "r" (C) \ + : "d0", "d1", "d2", "d3", "d4", \ + "d10", "d11", "d12", "d13", "d14", "d16", \ + "d20", "d21", "d22", "d23", "d24", "d26", \ + "d31", "memory"); \ + mlen = (uint64_t)mlen_hi << 32 | mlen_lo; \ + } while (0) + +#define CT() \ + do { \ + uint32_t clen_hi = (uint32_t)(clen >> 32); \ + uint32_t clen_lo = (uint32_t)clen; \ + __asm__ __volatile__ ( \ + ".arm \n\t" \ + ".fpu neon \n\t" \ + "cmp %[clen_hi], #0 \n\t" \ + "cmpeq %[clen_lo], #7 \n\t" \ + "bls .LCT1 \n\t" \ + "vldm %[s], {d0-d4} \n\t" \ + ".LCT0: \n\t" \ + "vldm %[c]!, {d26} \n\t" \ + "vrev64.8 d16, d0 \n\t" \ + "veor d16, d16, d26 \n\t" \ + "vrev64.8 d0, d26 \n\t" \ + "vstm %[m]!, {d16} \n\t" \ + "vmvn d2, d2 \n\t" \ + ROUND(48) \ + ROUND(56) \ + ROUND(64) \ + ROUND(72) \ + ROUND(80) \ + ROUND(88) \ + "vmvn d2, d2 \n\t" \ + "sub %[clen_lo], %[clen_lo], #8 \n\t" \ + "sbc %[clen_hi], %[clen_hi], #0 \n\t" \ + "cmp %[clen_hi], #0 \n\t" \ + "cmpeq %[clen_lo], #7 \n\t" \ + "bhi .LCT0 \n\t" \ + "vstm %[s], {d0-d4} \n\t" \ + ".LCT1: \n\t" \ + : [clen_hi] "+r" (clen_hi), [clen_lo] "+r" (clen_lo), \ + [m] "+r" (m), [c] "+r" (c) \ + : [s] "r" (&s), [C] "r" (C) \ + : "d0", "d1", "d2", "d3", "d4", \ + "d10", "d11", "d12", "d13", "d14", "d16", \ + "d20", "d21", "d22", "d23", "d24", "d26", \ + "d31", "memory"); \ + clen = (uint64_t)clen_hi << 32 | clen_lo; \ + } while (0) + +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64/api.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64/api.h index a4aa567..96a7c47 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/opt64/api.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64/api.h @@ -3,3 +3,4 @@ #define CRYPTO_NPUBBYTES 16 #define CRYPTO_ABYTES 16 #define CRYPTO_NOOVERLAP 1 +#define ASCON_RATE 8 diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64/ascon.c b/ascon/Implementations/crypto_aead/ascon128v12/opt64/ascon.c new file mode 100644 index 0000000..9011a77 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64/ascon.c @@ -0,0 +1,250 @@ +#include "ascon.h" + +#include "api.h" +#include "loadstore.h" +#include "permutations.h" +#include "printstate.h" + +__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2, + const uint8_t* k) { + KINIT(K0, K1, K2); + if (CRYPTO_KEYBYTES == 20) { + XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4))); + k += 4; + } + XOR(*K1, LOAD64(k)); + XOR(*K2, LOAD64(k + 8)); +} + +__forceinline void init(state_t* s, const uint8_t* npub, const uint8_t* k) { + word_t N0, N1; + word_t K0, K1, K2; + /* load nonce */ + N0 = LOAD64(npub); + N1 = LOAD64(npub + 8); + /* load key */ + loadkey(&K0, &K1, &K2, k); + /* initialization */ + PINIT(s); + XOR(s->x0, IV); + if (CRYPTO_KEYBYTES == 20) XOR(s->x0, K0); + XOR(s->x1, K1); + XOR(s->x2, K2); + XOR(s->x3, N0); + XOR(s->x4, N1); + P12(s); + if (CRYPTO_KEYBYTES == 20) XOR(s->x2, K0); + XOR(s->x3, K1); + XOR(s->x4, K2); + printstate("initialization", s); +} + +__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + word_t* restrict px; + /* process associated data */ + if (adlen) { + while (adlen >= ASCON_RATE) { + XOR(s->x0, LOAD64(ad)); + if (ASCON_RATE == 16) XOR(s->x1, LOAD64(ad + 8)); + PB(s); + ad += ASCON_RATE; + adlen -= ASCON_RATE; + } + /* final associated data block */ + px = &s->x0; + if (ASCON_RATE == 16 && adlen >= 8) { + XOR(s->x0, LOAD64(ad)); + px = &s->x1; + ad += 8; + adlen -= 8; + } + if (adlen) XOR(*px, LOAD(ad, adlen)); + XOR(*px, PAD(adlen)); + PB(s); + } + XOR(s->x4, WORD_T(1)); + printstate("process associated data", s); +} + +__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m, + uint64_t mlen) { + word_t* restrict px; + /* process plaintext */ + while (mlen >= ASCON_RATE) { + XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + if (ASCON_RATE == 16) { + XOR(s->x1, LOAD64(m + 8)); + STORE64(c + 8, s->x1); + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + mlen -= ASCON_RATE; + } + /* final plaintext block */ + px = &s->x0; + if (ASCON_RATE == 16 && mlen >= 8) { + XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + px = &s->x1; + m += 8; + c += 8; + mlen -= 8; + } + if (mlen) { + XOR(*px, LOAD(m, mlen)); + STORE(c, *px, mlen); + } + XOR(*px, PAD(mlen)); + printstate("process plaintext", s); +} + +__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c, + uint64_t clen) { + word_t* restrict px; + word_t cx; + /* process ciphertext */ + while (clen >= ASCON_RATE) { + cx = LOAD64(c); + XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + if (ASCON_RATE == 16) { + cx = LOAD64(c + 8); + XOR(s->x1, cx); + STORE64(m + 8, s->x1); + s->x1 = cx; + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + clen -= ASCON_RATE; + } + /* final ciphertext block */ + px = &s->x0; + if (ASCON_RATE == 16 && clen >= 8) { + cx = LOAD64(c); + XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + px = &s->x1; + m += 8; + c += 8; + clen -= 8; + } + if (clen) { + cx = LOAD(c, clen); + XOR(*px, cx); + STORE(m, *px, clen); + AND(*px, XMASK(clen)); + XOR(*px, cx); + } + XOR(*px, PAD(clen)); + printstate("process ciphertext", s); +} + +__forceinline void final(state_t* s, const uint8_t* k) { + word_t K0, K1, K2; + /* load key */ + loadkey(&K0, &K1, &K2, k); + /* finalization */ + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) { + XOR(s->x1, K1); + XOR(s->x2, K2); + } + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { + XOR(s->x2, K1); + XOR(s->x3, K2); + } + if (CRYPTO_KEYBYTES == 20) { + XOR(s->x1, KEYROT(K0, K1)); + XOR(s->x2, KEYROT(K1, K2)); + XOR(s->x3, KEYROT(K2, WORD_T(0))); + } + P12(s); + XOR(s->x3, K1); + XOR(s->x4, K2); + printstate("finalization", s); +} + +#if !ASCON_INLINE_MODE + +#define INIT ascon_init +#define ABSORB ascon_absorb +#define ENCRYPT ascon_encrypt +#define DECRYPT ascon_decrypt +#define FINAL ascon_final + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) { + init(s, npub, k); +} + +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + absorb(s, ad, adlen); +} + +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) { + encrypt(s, c, m, mlen); +} + +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) { + decrypt(s, m, c, clen); +} + +void ascon_final(state_t* s, const uint8_t* k) { final(s, k); } + +#else + +#define INIT init +#define ABSORB absorb +#define ENCRYPT encrypt +#define DECRYPT decrypt +#define FINAL final + +#endif + +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + state_t s; + (void)nsec; + *clen = mlen + CRYPTO_ABYTES; + /* perform ascon computation */ + INIT(&s, npub, k); + ABSORB(&s, ad, adlen); + ENCRYPT(&s, c, m, mlen); + FINAL(&s, k); + /* set tag */ + c += mlen; + STORE64(c, s.x3); + STORE64(c + 8, s.x4); + return 0; +} + +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { + state_t s; + (void)nsec; + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + *mlen = clen = clen - CRYPTO_ABYTES; + /* perform ascon computation */ + INIT(&s, npub, k); + ABSORB(&s, ad, adlen); + DECRYPT(&s, m, c, clen); + FINAL(&s, k); + /* verify tag (should be constant time, check compiler output) */ + c += clen; + XOR(s.x3, LOAD64(c)); + XOR(s.x4, LOAD64(c + 8)); + if (NOTZERO(s.x3, s.x4)) { + *mlen = 0; + return -1; + } + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64/ascon.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64/ascon.h new file mode 100644 index 0000000..10a5b6e --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64/ascon.h @@ -0,0 +1,19 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#include "config.h" +#include "word.h" + +typedef struct { + word_t x0, x1, x2, x3, x4; +} state_t; + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k); +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen); +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); +void ascon_final(state_t* s, const uint8_t* k); + +#endif // ASCON_H_ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64/config.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64/config.h new file mode 100644 index 0000000..8d8a1a0 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64/config.h @@ -0,0 +1,34 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* inline the Ascon mode */ +#ifndef ASCON_INLINE_MODE +#define ASCON_INLINE_MODE 1 +#endif + +/* inline the Ascon permutations */ +#ifndef ASCON_INLINE_PERM +#define ASCON_INLINE_PERM 1 +#endif + +/* single function for all permutations */ +#ifndef ASCON_SINGLE_PERM +#define ASCON_SINGLE_PERM 0 +#endif + +/* unroll the permutation loops */ +#ifndef ASCON_UNROLL_LOOPS +#define ASCON_UNROLL_LOOPS 1 +#endif + +/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ +#ifndef ASCON_DATA_ACCESS +#define ASCON_DATA_ACCESS 'H' +#endif + +/* make sure __forceinline is supported */ +#ifndef __forceinline +#define __forceinline inline __attribute__((always_inline)) +#endif + +#endif /* CONFIG_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64/endian.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64/endian.h index ffbdcd9..3944360 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/opt64/endian.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64/endian.h @@ -3,29 +3,37 @@ #if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -// macros for big endian machines +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif #define U64BIG(x) (x) #define U32BIG(x) (x) #define U16BIG(x) (x) #elif defined(_MSC_VER) || \ - (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -// macros for little endian machines -#define U64BIG(x) \ - ((((x) & 0x00000000000000FFULL) << 56) | (((x) & 0x000000000000FF00ULL) << 40) | \ - (((x) & 0x0000000000FF0000ULL) << 24) | (((x) & 0x00000000FF000000ULL) << 8) | \ - (((x) & 0x000000FF00000000ULL) >> 8) | (((x) & 0x0000FF0000000000ULL) >> 24) | \ - (((x) & 0x00FF000000000000ULL) >> 40) | (((x) & 0xFF00000000000000ULL) >> 56)) +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) #define U32BIG(x) \ - ((((x) & 0x000000FF) << 24) | (((x) & 0x0000FF00) << 8) | \ - (((x) & 0x00FF0000) >> 8) | (((x) & 0xFF000000) >> 24)) -#define U16BIG(x) \ - ((((x) & 0x00FF) << 8) | (((x) & 0xFF00) >> 8)) + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) #else -#error "ascon byte order macros not defined in endian.h" +#error "Ascon byte order macros not defined in endian.h" #endif -#endif // ENDIAN_H_ - +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64/loadstore.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64/loadstore.h new file mode 100644 index 0000000..6cb0cca --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64/loadstore.h @@ -0,0 +1,118 @@ +#ifndef LOADSTORE_H_ +#define LOADSTORE_H_ + +#include + +#include "config.h" +#include "endian.h" +#include "word.h" + +/* 64-bit LSB mask (undefined for n == 0) */ +#define MASK(n) (~0ull >> (64 - (n))) + +/* get byte from Ascon 64-bit word */ +#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i)))) + +/* set byte in Ascon 64-bit word */ +#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i))) + +#if ASCON_DATA_ACCESS == 'W' + +#ifndef NDEBUG +#pragma message("Using wordwise data access") +#endif + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = U64BIG(*(uint64_t*)bytes); + return U64TOWORD(x); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = U64BIG(*(uint64_t*)bytes & MASK(8 * n)); + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(8 * n); + *(uint64_t*)bytes |= U64BIG(x); +} + +#elif ASCON_DATA_ACCESS == 'M' + +#ifndef NDEBUG +#pragma message("Using memcpy to access data") +#endif + +#include + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + memcpy((uint8_t*)&x, bytes, n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = U64BIG(WORDTOU64(w)); + memcpy(bytes, (uint8_t*)&x, n); +} + +#elif ASCON_DATA_ACCESS == 'B' + +#ifndef NDEBUG +#pragma message("Using bytewise data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#elif ASCON_DATA_ACCESS == 'H' + +#ifndef NDEBUG +#pragma message("Using hybrid data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + if (n == 8) + x = U64BIG(*(uint64_t*)bytes); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + if (n == 8) + *(uint64_t*)bytes = U64BIG(x); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#else +#error "Ascon data access macro not defined correctly" +#endif + +#endif /* LOADSTORE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64/permutations.c b/ascon/Implementations/crypto_aead/ascon128v12/opt64/permutations.c new file mode 100644 index 0000000..3aecaa6 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64/permutations.c @@ -0,0 +1,26 @@ +#include "permutations.h" + +#include "round.h" + +#if ASCON_INLINE_PERM + +#elif ASCON_SINGLE_PERM + +void P(state_t* s, uint8_t rounds) { + printstate(" permutation input", s); + for (int i = START(rounds); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s) { P12ROUNDS(s); } + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16 +void P8(state_t* s) { P8ROUNDS(s); } +#endif + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 +void P6(state_t* s) { P6ROUNDS(s); } +#endif + +#endif diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64/permutations.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64/permutations.h index 4af1e2c..6172dd5 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/opt64/permutations.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64/permutations.h @@ -1,102 +1,184 @@ #ifndef PERMUTATIONS_H_ #define PERMUTATIONS_H_ -typedef unsigned char u8; -typedef unsigned long long u64; - -typedef struct { - u64 x0, x1, x2, x3, x4; -} state; - -#define EXT_BYTE64(x, n) ((u8)((u64)(x) >> (8 * (7 - (n))))) -#define INS_BYTE64(x, n) ((u64)(x) << (8 * (7 - (n)))) -#define ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) - -#define ROUND(C) \ - do { \ - state t; \ - s.x2 ^= C; \ - s.x0 ^= s.x4; \ - s.x4 ^= s.x3; \ - s.x2 ^= s.x1; \ - t.x0 = s.x0; \ - t.x4 = s.x4; \ - t.x3 = s.x3; \ - t.x1 = s.x1; \ - t.x2 = s.x2; \ - s.x0 = t.x0 ^ ((~t.x1) & t.x2); \ - s.x2 = t.x2 ^ ((~t.x3) & t.x4); \ - s.x4 = t.x4 ^ ((~t.x0) & t.x1); \ - s.x1 = t.x1 ^ ((~t.x2) & t.x3); \ - s.x3 = t.x3 ^ ((~t.x4) & t.x0); \ - s.x1 ^= s.x0; \ - t.x1 = s.x1; \ - s.x1 = ROTR64(s.x1, 39); \ - s.x3 ^= s.x2; \ - t.x2 = s.x2; \ - s.x2 = ROTR64(s.x2, 1); \ - t.x4 = s.x4; \ - t.x2 ^= s.x2; \ - s.x2 = ROTR64(s.x2, 6 - 1); \ - t.x3 = s.x3; \ - t.x1 ^= s.x1; \ - s.x3 = ROTR64(s.x3, 10); \ - s.x0 ^= s.x4; \ - s.x4 = ROTR64(s.x4, 7); \ - t.x3 ^= s.x3; \ - s.x2 ^= t.x2; \ - s.x1 = ROTR64(s.x1, 61 - 39); \ - t.x0 = s.x0; \ - s.x2 = ~s.x2; \ - s.x3 = ROTR64(s.x3, 17 - 10); \ - t.x4 ^= s.x4; \ - s.x4 = ROTR64(s.x4, 41 - 7); \ - s.x3 ^= t.x3; \ - s.x1 ^= t.x1; \ - s.x0 = ROTR64(s.x0, 19); \ - s.x4 ^= t.x4; \ - t.x0 ^= s.x0; \ - s.x0 = ROTR64(s.x0, 28 - 19); \ - s.x0 ^= t.x0; \ - } while (0) - -#define P12() \ - do { \ - ROUND(0xf0); \ - ROUND(0xe1); \ - ROUND(0xd2); \ - ROUND(0xc3); \ - ROUND(0xb4); \ - ROUND(0xa5); \ - ROUND(0x96); \ - ROUND(0x87); \ - ROUND(0x78); \ - ROUND(0x69); \ - ROUND(0x5a); \ - ROUND(0x4b); \ - } while (0) - -#define P8() \ - do { \ - ROUND(0xb4); \ - ROUND(0xa5); \ - ROUND(0x96); \ - ROUND(0x87); \ - ROUND(0x78); \ - ROUND(0x69); \ - ROUND(0x5a); \ - ROUND(0x4b); \ - } while (0) - -#define P6() \ - do { \ - ROUND(0x96); \ - ROUND(0x87); \ - ROUND(0x78); \ - ROUND(0x69); \ - ROUND(0x5a); \ - ROUND(0x4b); \ - } while (0) - -#endif // PERMUTATIONS_H_ +#include +#include "api.h" +#include "ascon.h" +#include "printstate.h" +#include "round.h" + +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 + +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 + +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 + +#define ASCON_HASH_BYTES 32 + +#define ASCON_128_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_128A_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) + +#define ASCON_80PQ_IV \ + U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_HASH_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) + +#define ASCON_XOF_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) + +#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) +#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) +#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) +#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) +#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) + +#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) +#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) +#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) +#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) +#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 +#define IV ASCON_128_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#if ASCON_RATE == 16 +#define IV ASCON_128A_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define PB P8 +#endif + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 +#define IV ASCON_80PQ_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#define START(n) ((3 + (n)) << 4 | (12 - (n))) + +#if ASCON_UNROLL_LOOPS + +__forceinline void P12ROUNDS(state_t* s) { + ROUND(s, 0xf0); + ROUND(s, 0xe1); + ROUND(s, 0xd2); + ROUND(s, 0xc3); + ROUND(s, 0xb4); + ROUND(s, 0xa5); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +__forceinline void P8ROUNDS(state_t* s) { + ROUND(s, 0xb4); + ROUND(s, 0xa5); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +__forceinline void P6ROUNDS(state_t* s) { + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +#else /* !ASCON_UNROLL_LOOPS */ + +__forceinline void P12ROUNDS(state_t* s) { + for (int i = START(12); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +__forceinline void P8ROUNDS(state_t* s) { + for (int i = START(8); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +__forceinline void P6ROUNDS(state_t* s) { + for (int i = START(6); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +#endif + +#if ASCON_INLINE_PERM + +__forceinline void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +__forceinline void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} + +__forceinline void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#elif ASCON_SINGLE_PERM + +#define P12(s) P(s, 12) +#define P8(s) P(s, 8) +#define P6(s) P(s, 6) + +void P(state_t* s, uint8_t rounds); + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s); +void P8(state_t* s); +void P6(state_t* s); + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#endif + +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64/printstate.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64/printstate.h new file mode 100644 index 0000000..34bd476 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64/printstate.h @@ -0,0 +1,31 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64/round.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64/round.h new file mode 100644 index 0000000..077cbfd --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64/round.h @@ -0,0 +1,69 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" + +__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { + *K0 = WORD_T(0); + *K1 = WORD_T(0); + *K2 = WORD_T(0); +} + +__forceinline void PINIT(state_t* s) { + s->x0 = WORD_T(0); + s->x1 = WORD_T(0); + s->x2 = WORD_T(0); + s->x3 = WORD_T(0); + s->x4 = WORD_T(0); +} + +__forceinline void ROUND(state_t* s, uint64_t C) { + state_t t; + s->x2 ^= C; + s->x0 ^= s->x4; + s->x4 ^= s->x3; + s->x2 ^= s->x1; + t.x0 = s->x0; + t.x4 = s->x4; + t.x3 = s->x3; + t.x1 = s->x1; + t.x2 = s->x2; + s->x0 = t.x0 ^ (~t.x1 & t.x2); + s->x2 = t.x2 ^ (~t.x3 & t.x4); + s->x4 = t.x4 ^ (~t.x0 & t.x1); + s->x1 = t.x1 ^ (~t.x2 & t.x3); + s->x3 = t.x3 ^ (~t.x4 & t.x0); + s->x1 ^= s->x0; + t.x1 = s->x1; + s->x1 = ROR64(s->x1, 39); + s->x3 ^= s->x2; + t.x2 = s->x2; + s->x2 = ROR64(s->x2, 1); + t.x4 = s->x4; + t.x2 ^= s->x2; + s->x2 = ROR64(s->x2, 6 - 1); + t.x3 = s->x3; + t.x1 ^= s->x1; + s->x3 = ROR64(s->x3, 10); + s->x0 ^= s->x4; + s->x4 = ROR64(s->x4, 7); + t.x3 ^= s->x3; + s->x2 ^= t.x2; + s->x1 = ROR64(s->x1, 61 - 39); + t.x0 = s->x0; + s->x2 = ~s->x2; + s->x3 = ROR64(s->x3, 17 - 10); + t.x4 ^= s->x4; + s->x4 = ROR64(s->x4, 41 - 7); + s->x3 ^= t.x3; + s->x1 ^= t.x1; + s->x0 = ROR64(s->x0, 19); + s->x4 ^= t.x4; + t.x0 ^= s->x0; + s->x0 = ROR64(s->x0, 28 - 19); + s->x0 ^= t.x0; + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64/word.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64/word.h new file mode 100644 index 0000000..5d601bb --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64/word.h @@ -0,0 +1,47 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#include "config.h" + +typedef uint64_t word_t; + +#define WORD_T +#define UINT64_T + +#define U64TOWORD +#define WORDTOU64 + +#define XOR(a, b) \ + do { \ + (a) ^= (b); \ + } while (0) + +#define AND(a, b) \ + do { \ + (a) &= (b); \ + } while (0) + +__forceinline word_t ROR64(word_t x, int n) { return x >> n | x << (64 - n); } + +__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { + return lo2hi << 32 | hi2lo >> 32; +} + +__forceinline int NOTZERO(word_t a, word_t b) { + int result = 0; + for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i]; + for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i]; + return result; +} + +/* set padding byte in 64-bit Ascon word */ +__forceinline word_t PAD(int i) { return WORD_T(0x80ull << (56 - 8 * i)); } + +/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ +__forceinline word_t XMASK(int n) { + return WORD_T(0x00ffffffffffffffull >> (n * 8 - 8)); +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_2shares/api.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64_2shares/api.h new file mode 100644 index 0000000..96a7c47 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_2shares/api.h @@ -0,0 +1,6 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 +#define ASCON_RATE 8 diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_2shares/ascon.c b/ascon/Implementations/crypto_aead/ascon128v12/opt64_2shares/ascon.c new file mode 100644 index 0000000..9011a77 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_2shares/ascon.c @@ -0,0 +1,250 @@ +#include "ascon.h" + +#include "api.h" +#include "loadstore.h" +#include "permutations.h" +#include "printstate.h" + +__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2, + const uint8_t* k) { + KINIT(K0, K1, K2); + if (CRYPTO_KEYBYTES == 20) { + XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4))); + k += 4; + } + XOR(*K1, LOAD64(k)); + XOR(*K2, LOAD64(k + 8)); +} + +__forceinline void init(state_t* s, const uint8_t* npub, const uint8_t* k) { + word_t N0, N1; + word_t K0, K1, K2; + /* load nonce */ + N0 = LOAD64(npub); + N1 = LOAD64(npub + 8); + /* load key */ + loadkey(&K0, &K1, &K2, k); + /* initialization */ + PINIT(s); + XOR(s->x0, IV); + if (CRYPTO_KEYBYTES == 20) XOR(s->x0, K0); + XOR(s->x1, K1); + XOR(s->x2, K2); + XOR(s->x3, N0); + XOR(s->x4, N1); + P12(s); + if (CRYPTO_KEYBYTES == 20) XOR(s->x2, K0); + XOR(s->x3, K1); + XOR(s->x4, K2); + printstate("initialization", s); +} + +__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + word_t* restrict px; + /* process associated data */ + if (adlen) { + while (adlen >= ASCON_RATE) { + XOR(s->x0, LOAD64(ad)); + if (ASCON_RATE == 16) XOR(s->x1, LOAD64(ad + 8)); + PB(s); + ad += ASCON_RATE; + adlen -= ASCON_RATE; + } + /* final associated data block */ + px = &s->x0; + if (ASCON_RATE == 16 && adlen >= 8) { + XOR(s->x0, LOAD64(ad)); + px = &s->x1; + ad += 8; + adlen -= 8; + } + if (adlen) XOR(*px, LOAD(ad, adlen)); + XOR(*px, PAD(adlen)); + PB(s); + } + XOR(s->x4, WORD_T(1)); + printstate("process associated data", s); +} + +__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m, + uint64_t mlen) { + word_t* restrict px; + /* process plaintext */ + while (mlen >= ASCON_RATE) { + XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + if (ASCON_RATE == 16) { + XOR(s->x1, LOAD64(m + 8)); + STORE64(c + 8, s->x1); + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + mlen -= ASCON_RATE; + } + /* final plaintext block */ + px = &s->x0; + if (ASCON_RATE == 16 && mlen >= 8) { + XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + px = &s->x1; + m += 8; + c += 8; + mlen -= 8; + } + if (mlen) { + XOR(*px, LOAD(m, mlen)); + STORE(c, *px, mlen); + } + XOR(*px, PAD(mlen)); + printstate("process plaintext", s); +} + +__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c, + uint64_t clen) { + word_t* restrict px; + word_t cx; + /* process ciphertext */ + while (clen >= ASCON_RATE) { + cx = LOAD64(c); + XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + if (ASCON_RATE == 16) { + cx = LOAD64(c + 8); + XOR(s->x1, cx); + STORE64(m + 8, s->x1); + s->x1 = cx; + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + clen -= ASCON_RATE; + } + /* final ciphertext block */ + px = &s->x0; + if (ASCON_RATE == 16 && clen >= 8) { + cx = LOAD64(c); + XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + px = &s->x1; + m += 8; + c += 8; + clen -= 8; + } + if (clen) { + cx = LOAD(c, clen); + XOR(*px, cx); + STORE(m, *px, clen); + AND(*px, XMASK(clen)); + XOR(*px, cx); + } + XOR(*px, PAD(clen)); + printstate("process ciphertext", s); +} + +__forceinline void final(state_t* s, const uint8_t* k) { + word_t K0, K1, K2; + /* load key */ + loadkey(&K0, &K1, &K2, k); + /* finalization */ + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) { + XOR(s->x1, K1); + XOR(s->x2, K2); + } + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { + XOR(s->x2, K1); + XOR(s->x3, K2); + } + if (CRYPTO_KEYBYTES == 20) { + XOR(s->x1, KEYROT(K0, K1)); + XOR(s->x2, KEYROT(K1, K2)); + XOR(s->x3, KEYROT(K2, WORD_T(0))); + } + P12(s); + XOR(s->x3, K1); + XOR(s->x4, K2); + printstate("finalization", s); +} + +#if !ASCON_INLINE_MODE + +#define INIT ascon_init +#define ABSORB ascon_absorb +#define ENCRYPT ascon_encrypt +#define DECRYPT ascon_decrypt +#define FINAL ascon_final + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) { + init(s, npub, k); +} + +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + absorb(s, ad, adlen); +} + +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) { + encrypt(s, c, m, mlen); +} + +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) { + decrypt(s, m, c, clen); +} + +void ascon_final(state_t* s, const uint8_t* k) { final(s, k); } + +#else + +#define INIT init +#define ABSORB absorb +#define ENCRYPT encrypt +#define DECRYPT decrypt +#define FINAL final + +#endif + +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + state_t s; + (void)nsec; + *clen = mlen + CRYPTO_ABYTES; + /* perform ascon computation */ + INIT(&s, npub, k); + ABSORB(&s, ad, adlen); + ENCRYPT(&s, c, m, mlen); + FINAL(&s, k); + /* set tag */ + c += mlen; + STORE64(c, s.x3); + STORE64(c + 8, s.x4); + return 0; +} + +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { + state_t s; + (void)nsec; + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + *mlen = clen = clen - CRYPTO_ABYTES; + /* perform ascon computation */ + INIT(&s, npub, k); + ABSORB(&s, ad, adlen); + DECRYPT(&s, m, c, clen); + FINAL(&s, k); + /* verify tag (should be constant time, check compiler output) */ + c += clen; + XOR(s.x3, LOAD64(c)); + XOR(s.x4, LOAD64(c + 8)); + if (NOTZERO(s.x3, s.x4)) { + *mlen = 0; + return -1; + } + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_2shares/ascon.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64_2shares/ascon.h new file mode 100644 index 0000000..d11fa01 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_2shares/ascon.h @@ -0,0 +1,20 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#include "config.h" +#include "word.h" + +typedef struct { + word_t x0, x1, x2, x3, x4; + word_t rx; +} state_t; + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k); +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen); +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); +void ascon_final(state_t* s, const uint8_t* k); + +#endif // ASCON_H_ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_2shares/config.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64_2shares/config.h new file mode 100644 index 0000000..87b8b74 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_2shares/config.h @@ -0,0 +1,44 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* inline the Ascon mode */ +#ifndef ASCON_INLINE_MODE +#define ASCON_INLINE_MODE 1 +#endif + +/* inline the Ascon permutations */ +#ifndef ASCON_INLINE_PERM +#define ASCON_INLINE_PERM 1 +#endif + +/* single function for all permutations */ +#ifndef ASCON_SINGLE_PERM +#define ASCON_SINGLE_PERM 0 +#endif + +/* unroll the permutation loops */ +#ifndef ASCON_UNROLL_LOOPS +#define ASCON_UNROLL_LOOPS 1 +#endif + +/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ +#ifndef ASCON_DATA_ACCESS +#define ASCON_DATA_ACCESS 'H' +#endif + +/* Ascon mask generator { 'S'tdlib, 'X'orshift, 'R'andombytes } */ +#ifndef ASCON_MASK_RNG +#define ASCON_MASK_RNG 'X' +#endif + +/* mask key/data loads */ +#ifndef ASCON_MASK_LOADS +#define ASCON_MASK_LOADS 0 +#endif + +/* make sure __forceinline is supported */ +#ifndef __forceinline +#define __forceinline inline __attribute__((always_inline)) +#endif + +#endif /* CONFIG_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_2shares/endian.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64_2shares/endian.h new file mode 100644 index 0000000..3944360 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_2shares/endian.h @@ -0,0 +1,39 @@ +#ifndef ENDIAN_H_ +#define ENDIAN_H_ + +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif +#define U64BIG(x) (x) +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +#elif defined(_MSC_VER) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) +#define U32BIG(x) \ + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) + +#else +#error "Ascon byte order macros not defined in endian.h" +#endif + +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_2shares/implementors b/ascon/Implementations/crypto_aead/ascon128v12/opt64_2shares/implementors new file mode 100644 index 0000000..b110c1a --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_2shares/implementors @@ -0,0 +1,2 @@ +Christoph Dobraunig +Martin Schläffer diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_2shares/loadstore.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64_2shares/loadstore.h new file mode 100644 index 0000000..6cb0cca --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_2shares/loadstore.h @@ -0,0 +1,118 @@ +#ifndef LOADSTORE_H_ +#define LOADSTORE_H_ + +#include + +#include "config.h" +#include "endian.h" +#include "word.h" + +/* 64-bit LSB mask (undefined for n == 0) */ +#define MASK(n) (~0ull >> (64 - (n))) + +/* get byte from Ascon 64-bit word */ +#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i)))) + +/* set byte in Ascon 64-bit word */ +#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i))) + +#if ASCON_DATA_ACCESS == 'W' + +#ifndef NDEBUG +#pragma message("Using wordwise data access") +#endif + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = U64BIG(*(uint64_t*)bytes); + return U64TOWORD(x); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = U64BIG(*(uint64_t*)bytes & MASK(8 * n)); + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(8 * n); + *(uint64_t*)bytes |= U64BIG(x); +} + +#elif ASCON_DATA_ACCESS == 'M' + +#ifndef NDEBUG +#pragma message("Using memcpy to access data") +#endif + +#include + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + memcpy((uint8_t*)&x, bytes, n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = U64BIG(WORDTOU64(w)); + memcpy(bytes, (uint8_t*)&x, n); +} + +#elif ASCON_DATA_ACCESS == 'B' + +#ifndef NDEBUG +#pragma message("Using bytewise data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#elif ASCON_DATA_ACCESS == 'H' + +#ifndef NDEBUG +#pragma message("Using hybrid data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + if (n == 8) + x = U64BIG(*(uint64_t*)bytes); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + if (n == 8) + *(uint64_t*)bytes = U64BIG(x); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#else +#error "Ascon data access macro not defined correctly" +#endif + +#endif /* LOADSTORE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_2shares/permutations.c b/ascon/Implementations/crypto_aead/ascon128v12/opt64_2shares/permutations.c new file mode 100644 index 0000000..3aecaa6 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_2shares/permutations.c @@ -0,0 +1,26 @@ +#include "permutations.h" + +#include "round.h" + +#if ASCON_INLINE_PERM + +#elif ASCON_SINGLE_PERM + +void P(state_t* s, uint8_t rounds) { + printstate(" permutation input", s); + for (int i = START(rounds); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s) { P12ROUNDS(s); } + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16 +void P8(state_t* s) { P8ROUNDS(s); } +#endif + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 +void P6(state_t* s) { P6ROUNDS(s); } +#endif + +#endif diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_2shares/permutations.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64_2shares/permutations.h new file mode 100644 index 0000000..6172dd5 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_2shares/permutations.h @@ -0,0 +1,184 @@ +#ifndef PERMUTATIONS_H_ +#define PERMUTATIONS_H_ + +#include + +#include "api.h" +#include "ascon.h" +#include "printstate.h" +#include "round.h" + +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 + +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 + +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 + +#define ASCON_HASH_BYTES 32 + +#define ASCON_128_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_128A_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) + +#define ASCON_80PQ_IV \ + U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_HASH_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) + +#define ASCON_XOF_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) + +#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) +#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) +#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) +#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) +#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) + +#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) +#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) +#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) +#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) +#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 +#define IV ASCON_128_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#if ASCON_RATE == 16 +#define IV ASCON_128A_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define PB P8 +#endif + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 +#define IV ASCON_80PQ_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#define START(n) ((3 + (n)) << 4 | (12 - (n))) + +#if ASCON_UNROLL_LOOPS + +__forceinline void P12ROUNDS(state_t* s) { + ROUND(s, 0xf0); + ROUND(s, 0xe1); + ROUND(s, 0xd2); + ROUND(s, 0xc3); + ROUND(s, 0xb4); + ROUND(s, 0xa5); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +__forceinline void P8ROUNDS(state_t* s) { + ROUND(s, 0xb4); + ROUND(s, 0xa5); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +__forceinline void P6ROUNDS(state_t* s) { + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +#else /* !ASCON_UNROLL_LOOPS */ + +__forceinline void P12ROUNDS(state_t* s) { + for (int i = START(12); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +__forceinline void P8ROUNDS(state_t* s) { + for (int i = START(8); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +__forceinline void P6ROUNDS(state_t* s) { + for (int i = START(6); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +#endif + +#if ASCON_INLINE_PERM + +__forceinline void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +__forceinline void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} + +__forceinline void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#elif ASCON_SINGLE_PERM + +#define P12(s) P(s, 12) +#define P8(s) P(s, 8) +#define P6(s) P(s, 6) + +void P(state_t* s, uint8_t rounds); + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s); +void P8(state_t* s); +void P6(state_t* s); + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#endif + +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_2shares/printstate.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64_2shares/printstate.h new file mode 100644 index 0000000..34bd476 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_2shares/printstate.h @@ -0,0 +1,31 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_2shares/random.c b/ascon/Implementations/crypto_aead/ascon128v12/opt64_2shares/random.c new file mode 100644 index 0000000..81d0081 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_2shares/random.c @@ -0,0 +1,62 @@ + +#include "random.h" + +#include +#include + +#include "config.h" + +#if ASCON_MASK_RNG == 'R' + +void randinit() { srand(time(0)); } + +uint32_t rand32() { + uint32_t r; + randombytes(&r, 4); + return r; +} + +uint64_t rand64() { + uint64_t r; + randombytes(&r, 8); + return r; +} + +#elif ASCON_MASK_RNG == 'S' + +void randinit() { srand(time(0)); } + +uint32_t rand32() { return ((uint32_t)rand() << 21) ^ rand(); } + +uint64_t rand64() { + return ((uint64_t)rand() << 43) ^ ((uint64_t)rand() << 21) ^ rand(); +} + +#elif ASCON_MASK_RNG == 'X' + +uint32_t xorshift32; +uint64_t xorshift64; + +void randinit() { + srand(time(0)); + xorshift32 = rand(); + xorshift64 = (uint64_t)rand() << 32 | rand(); +} + +uint32_t rand32() { + uint32_t x = xorshift32; + x ^= x << 13; + x ^= x >> 17; + x ^= x << 5; + return xorshift32 = x; +} + +uint64_t rand64() { + uint64_t x = xorshift64; + x ^= x << 13; + x ^= x >> 7; + x ^= x << 17; + return xorshift64 = x; +} + +#endif diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_2shares/random.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64_2shares/random.h new file mode 100644 index 0000000..89226b6 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_2shares/random.h @@ -0,0 +1,10 @@ +#ifndef RANDOM_H_ +#define RANDOM_H_ + +#include + +void randinit(); +uint32_t rand32(); +uint64_t rand64(); + +#endif /* RANDOM_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_2shares/round.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64_2shares/round.h new file mode 100644 index 0000000..4b27aaa --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_2shares/round.h @@ -0,0 +1,75 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" +#include "random.h" + +__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { + *K0 = TOSHARES(0); + *K1 = TOSHARES(0); + *K2 = TOSHARES(0); +} + +__forceinline void PINIT(state_t* s) { + randinit(); + s->x0 = TOSHARES(0); + s->x1 = TOSHARES(0); + s->x2 = TOSHARES(0); + s->x3 = TOSHARES(0); + s->x4 = TOSHARES(0); + s->rx = TOSHARES(0); +} + +#define TOFFOLI(a0, a1, b0, b1, c0, c1) \ + do { \ + (a0) ^= (~(b0)) & (c1); \ + (a0) ^= (~(b0)) & (c0); \ + (a1) ^= (b1) & (c1); \ + (a1) ^= (b1) & (c0); \ + } while (0) + +__forceinline void ROUND(state_t* s, uint64_t C) { + /* refresh randomness */ + /* s->rx = TOSHARES(0); */ + /* addition of round constant */ + s->x2.s0 ^= C; + /* substitution layer */ + s->x0.s0 ^= s->x4.s0; + s->x4.s0 ^= s->x3.s0; + s->x2.s0 ^= s->x1.s0; + s->x0.s1 ^= s->x4.s1; + s->x4.s1 ^= s->x3.s1; + s->x2.s1 ^= s->x1.s1; + /* start of shared keccak s-box from https://eprint.iacr.org/2019/536 */ + s->rx.s0 = s->rx.s1; + TOFFOLI(s->rx.s0, s->rx.s1, s->x4.s0, s->x4.s1, s->x0.s0, s->x0.s1); + TOFFOLI(s->x0.s0, s->x0.s1, s->x1.s0, s->x1.s1, s->x2.s0, s->x2.s1); + TOFFOLI(s->x2.s0, s->x2.s1, s->x3.s0, s->x3.s1, s->x4.s0, s->x4.s1); + TOFFOLI(s->x4.s0, s->x4.s1, s->x0.s0, s->x0.s1, s->x1.s0, s->x1.s1); + TOFFOLI(s->x1.s0, s->x1.s1, s->x2.s0, s->x2.s1, s->x3.s0, s->x3.s1); + s->x3.s1 ^= s->rx.s1; + s->x3.s0 ^= s->rx.s0; + /* end of shared keccak s-box */ + s->x1.s0 ^= s->x0.s0; + s->x0.s0 ^= s->x4.s0; + s->x3.s0 ^= s->x2.s0; + s->x2.s0 = ~s->x2.s0; + s->x1.s1 ^= s->x0.s1; + s->x0.s1 ^= s->x4.s1; + s->x3.s1 ^= s->x2.s1; + /* linear diffusion layer */ + s->x0.s1 ^= ROR64(s->x0.s1, 19) ^ ROR64(s->x0.s1, 28); + s->x1.s1 ^= ROR64(s->x1.s1, 61) ^ ROR64(s->x1.s1, 39); + s->x2.s1 ^= ROR64(s->x2.s1, 1) ^ ROR64(s->x2.s1, 6); + s->x3.s1 ^= ROR64(s->x3.s1, 10) ^ ROR64(s->x3.s1, 17); + s->x4.s1 ^= ROR64(s->x4.s1, 7) ^ ROR64(s->x4.s1, 41); + s->x0.s0 ^= ROR64(s->x0.s0, 19) ^ ROR64(s->x0.s0, 28); + s->x1.s0 ^= ROR64(s->x1.s0, 61) ^ ROR64(s->x1.s0, 39); + s->x2.s0 ^= ROR64(s->x2.s0, 1) ^ ROR64(s->x2.s0, 6); + s->x3.s0 ^= ROR64(s->x3.s0, 10) ^ ROR64(s->x3.s0, 17); + s->x4.s0 ^= ROR64(s->x4.s0, 7) ^ ROR64(s->x4.s0, 41); + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_2shares/word.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64_2shares/word.h new file mode 100644 index 0000000..6f1effc --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_2shares/word.h @@ -0,0 +1,80 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#include "config.h" +#include "random.h" + +typedef uint64_t share_t; + +typedef struct { + share_t s0; + share_t s1; +} word_t; + +__forceinline word_t WORD_T(uint64_t x) { return (word_t){x, 0}; } + +__forceinline uint64_t UINT64_T(word_t w) { return w.s0; } + +__forceinline word_t TOSHARES(share_t in) { + uint64_t r0 = rand64(); + return (word_t){in ^ r0, r0}; +} + +__forceinline share_t FROMSHARES(word_t in) { return in.s0 ^ in.s1; } + +__forceinline word_t U64TOWORD(uint64_t x) { +#if ASCON_MASK_LOADS + return TOSHARES(x); +#else + return WORD_T(x); +#endif +} + +__forceinline uint64_t WORDTOU64(word_t w) { return FROMSHARES(w); } + +#define XOR(a, b) \ + do { \ + word_t t = b; \ + (a).s0 ^= t.s0; \ + (a).s1 ^= t.s1; \ + } while (0) + +#define AND(a, b) \ + do { \ + word_t ta = a; \ + word_t tb = b; \ + (a).s0 = (ta.s0 & tb.s0) ^ (ta.s0 & tb.s1); \ + (a).s1 = (ta.s1 & tb.s0) ^ (ta.s1 & tb.s1); \ + } while (0) + +__forceinline uint64_t ROR64(uint64_t x, int n) { + return x >> n | x << (64 - n); +} + +__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { + word_t r; + r.s0 = lo2hi.s0 << 32 | hi2lo.s0 >> 32; + r.s1 = lo2hi.s1 << 32 | hi2lo.s1 >> 32; + return r; +} + +__forceinline int NOTZERO(word_t a, word_t b) { + int result = 0; + for (int i = 0; i < 8; ++i) + result |= ((uint8_t*)&(a.s0))[i] ^ ((uint8_t*)&(a.s1))[i]; + for (int i = 0; i < 8; ++i) + result |= ((uint8_t*)&(b.s0))[i] ^ ((uint8_t*)&(b.s1))[i]; + return result; +} + +/* set padding byte in 64-bit Ascon word */ +__forceinline word_t PAD(int i) { return WORD_T(0x80ull << (56 - 8 * i)); } + +/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ +__forceinline word_t XMASK(int n) { + return WORD_T(0x00ffffffffffffffull >> (n * 8 - 8)); +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_3shares/api.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64_3shares/api.h new file mode 100644 index 0000000..96a7c47 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_3shares/api.h @@ -0,0 +1,6 @@ +#define CRYPTO_KEYBYTES 16 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 +#define ASCON_RATE 8 diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_3shares/ascon.c b/ascon/Implementations/crypto_aead/ascon128v12/opt64_3shares/ascon.c new file mode 100644 index 0000000..9011a77 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_3shares/ascon.c @@ -0,0 +1,250 @@ +#include "ascon.h" + +#include "api.h" +#include "loadstore.h" +#include "permutations.h" +#include "printstate.h" + +__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2, + const uint8_t* k) { + KINIT(K0, K1, K2); + if (CRYPTO_KEYBYTES == 20) { + XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4))); + k += 4; + } + XOR(*K1, LOAD64(k)); + XOR(*K2, LOAD64(k + 8)); +} + +__forceinline void init(state_t* s, const uint8_t* npub, const uint8_t* k) { + word_t N0, N1; + word_t K0, K1, K2; + /* load nonce */ + N0 = LOAD64(npub); + N1 = LOAD64(npub + 8); + /* load key */ + loadkey(&K0, &K1, &K2, k); + /* initialization */ + PINIT(s); + XOR(s->x0, IV); + if (CRYPTO_KEYBYTES == 20) XOR(s->x0, K0); + XOR(s->x1, K1); + XOR(s->x2, K2); + XOR(s->x3, N0); + XOR(s->x4, N1); + P12(s); + if (CRYPTO_KEYBYTES == 20) XOR(s->x2, K0); + XOR(s->x3, K1); + XOR(s->x4, K2); + printstate("initialization", s); +} + +__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + word_t* restrict px; + /* process associated data */ + if (adlen) { + while (adlen >= ASCON_RATE) { + XOR(s->x0, LOAD64(ad)); + if (ASCON_RATE == 16) XOR(s->x1, LOAD64(ad + 8)); + PB(s); + ad += ASCON_RATE; + adlen -= ASCON_RATE; + } + /* final associated data block */ + px = &s->x0; + if (ASCON_RATE == 16 && adlen >= 8) { + XOR(s->x0, LOAD64(ad)); + px = &s->x1; + ad += 8; + adlen -= 8; + } + if (adlen) XOR(*px, LOAD(ad, adlen)); + XOR(*px, PAD(adlen)); + PB(s); + } + XOR(s->x4, WORD_T(1)); + printstate("process associated data", s); +} + +__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m, + uint64_t mlen) { + word_t* restrict px; + /* process plaintext */ + while (mlen >= ASCON_RATE) { + XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + if (ASCON_RATE == 16) { + XOR(s->x1, LOAD64(m + 8)); + STORE64(c + 8, s->x1); + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + mlen -= ASCON_RATE; + } + /* final plaintext block */ + px = &s->x0; + if (ASCON_RATE == 16 && mlen >= 8) { + XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + px = &s->x1; + m += 8; + c += 8; + mlen -= 8; + } + if (mlen) { + XOR(*px, LOAD(m, mlen)); + STORE(c, *px, mlen); + } + XOR(*px, PAD(mlen)); + printstate("process plaintext", s); +} + +__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c, + uint64_t clen) { + word_t* restrict px; + word_t cx; + /* process ciphertext */ + while (clen >= ASCON_RATE) { + cx = LOAD64(c); + XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + if (ASCON_RATE == 16) { + cx = LOAD64(c + 8); + XOR(s->x1, cx); + STORE64(m + 8, s->x1); + s->x1 = cx; + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + clen -= ASCON_RATE; + } + /* final ciphertext block */ + px = &s->x0; + if (ASCON_RATE == 16 && clen >= 8) { + cx = LOAD64(c); + XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + px = &s->x1; + m += 8; + c += 8; + clen -= 8; + } + if (clen) { + cx = LOAD(c, clen); + XOR(*px, cx); + STORE(m, *px, clen); + AND(*px, XMASK(clen)); + XOR(*px, cx); + } + XOR(*px, PAD(clen)); + printstate("process ciphertext", s); +} + +__forceinline void final(state_t* s, const uint8_t* k) { + word_t K0, K1, K2; + /* load key */ + loadkey(&K0, &K1, &K2, k); + /* finalization */ + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) { + XOR(s->x1, K1); + XOR(s->x2, K2); + } + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { + XOR(s->x2, K1); + XOR(s->x3, K2); + } + if (CRYPTO_KEYBYTES == 20) { + XOR(s->x1, KEYROT(K0, K1)); + XOR(s->x2, KEYROT(K1, K2)); + XOR(s->x3, KEYROT(K2, WORD_T(0))); + } + P12(s); + XOR(s->x3, K1); + XOR(s->x4, K2); + printstate("finalization", s); +} + +#if !ASCON_INLINE_MODE + +#define INIT ascon_init +#define ABSORB ascon_absorb +#define ENCRYPT ascon_encrypt +#define DECRYPT ascon_decrypt +#define FINAL ascon_final + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) { + init(s, npub, k); +} + +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + absorb(s, ad, adlen); +} + +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) { + encrypt(s, c, m, mlen); +} + +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) { + decrypt(s, m, c, clen); +} + +void ascon_final(state_t* s, const uint8_t* k) { final(s, k); } + +#else + +#define INIT init +#define ABSORB absorb +#define ENCRYPT encrypt +#define DECRYPT decrypt +#define FINAL final + +#endif + +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + state_t s; + (void)nsec; + *clen = mlen + CRYPTO_ABYTES; + /* perform ascon computation */ + INIT(&s, npub, k); + ABSORB(&s, ad, adlen); + ENCRYPT(&s, c, m, mlen); + FINAL(&s, k); + /* set tag */ + c += mlen; + STORE64(c, s.x3); + STORE64(c + 8, s.x4); + return 0; +} + +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { + state_t s; + (void)nsec; + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + *mlen = clen = clen - CRYPTO_ABYTES; + /* perform ascon computation */ + INIT(&s, npub, k); + ABSORB(&s, ad, adlen); + DECRYPT(&s, m, c, clen); + FINAL(&s, k); + /* verify tag (should be constant time, check compiler output) */ + c += clen; + XOR(s.x3, LOAD64(c)); + XOR(s.x4, LOAD64(c + 8)); + if (NOTZERO(s.x3, s.x4)) { + *mlen = 0; + return -1; + } + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_3shares/ascon.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64_3shares/ascon.h new file mode 100644 index 0000000..d11fa01 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_3shares/ascon.h @@ -0,0 +1,20 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#include "config.h" +#include "word.h" + +typedef struct { + word_t x0, x1, x2, x3, x4; + word_t rx; +} state_t; + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k); +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen); +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); +void ascon_final(state_t* s, const uint8_t* k); + +#endif // ASCON_H_ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_3shares/config.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64_3shares/config.h new file mode 100644 index 0000000..87b8b74 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_3shares/config.h @@ -0,0 +1,44 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* inline the Ascon mode */ +#ifndef ASCON_INLINE_MODE +#define ASCON_INLINE_MODE 1 +#endif + +/* inline the Ascon permutations */ +#ifndef ASCON_INLINE_PERM +#define ASCON_INLINE_PERM 1 +#endif + +/* single function for all permutations */ +#ifndef ASCON_SINGLE_PERM +#define ASCON_SINGLE_PERM 0 +#endif + +/* unroll the permutation loops */ +#ifndef ASCON_UNROLL_LOOPS +#define ASCON_UNROLL_LOOPS 1 +#endif + +/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ +#ifndef ASCON_DATA_ACCESS +#define ASCON_DATA_ACCESS 'H' +#endif + +/* Ascon mask generator { 'S'tdlib, 'X'orshift, 'R'andombytes } */ +#ifndef ASCON_MASK_RNG +#define ASCON_MASK_RNG 'X' +#endif + +/* mask key/data loads */ +#ifndef ASCON_MASK_LOADS +#define ASCON_MASK_LOADS 0 +#endif + +/* make sure __forceinline is supported */ +#ifndef __forceinline +#define __forceinline inline __attribute__((always_inline)) +#endif + +#endif /* CONFIG_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_3shares/endian.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64_3shares/endian.h new file mode 100644 index 0000000..3944360 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_3shares/endian.h @@ -0,0 +1,39 @@ +#ifndef ENDIAN_H_ +#define ENDIAN_H_ + +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif +#define U64BIG(x) (x) +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +#elif defined(_MSC_VER) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) +#define U32BIG(x) \ + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) + +#else +#error "Ascon byte order macros not defined in endian.h" +#endif + +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_3shares/implementors b/ascon/Implementations/crypto_aead/ascon128v12/opt64_3shares/implementors new file mode 100644 index 0000000..b110c1a --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_3shares/implementors @@ -0,0 +1,2 @@ +Christoph Dobraunig +Martin Schläffer diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_3shares/loadstore.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64_3shares/loadstore.h new file mode 100644 index 0000000..6cb0cca --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_3shares/loadstore.h @@ -0,0 +1,118 @@ +#ifndef LOADSTORE_H_ +#define LOADSTORE_H_ + +#include + +#include "config.h" +#include "endian.h" +#include "word.h" + +/* 64-bit LSB mask (undefined for n == 0) */ +#define MASK(n) (~0ull >> (64 - (n))) + +/* get byte from Ascon 64-bit word */ +#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i)))) + +/* set byte in Ascon 64-bit word */ +#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i))) + +#if ASCON_DATA_ACCESS == 'W' + +#ifndef NDEBUG +#pragma message("Using wordwise data access") +#endif + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = U64BIG(*(uint64_t*)bytes); + return U64TOWORD(x); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = U64BIG(*(uint64_t*)bytes & MASK(8 * n)); + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(8 * n); + *(uint64_t*)bytes |= U64BIG(x); +} + +#elif ASCON_DATA_ACCESS == 'M' + +#ifndef NDEBUG +#pragma message("Using memcpy to access data") +#endif + +#include + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + memcpy((uint8_t*)&x, bytes, n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = U64BIG(WORDTOU64(w)); + memcpy(bytes, (uint8_t*)&x, n); +} + +#elif ASCON_DATA_ACCESS == 'B' + +#ifndef NDEBUG +#pragma message("Using bytewise data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#elif ASCON_DATA_ACCESS == 'H' + +#ifndef NDEBUG +#pragma message("Using hybrid data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + if (n == 8) + x = U64BIG(*(uint64_t*)bytes); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + if (n == 8) + *(uint64_t*)bytes = U64BIG(x); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#else +#error "Ascon data access macro not defined correctly" +#endif + +#endif /* LOADSTORE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_3shares/permutations.c b/ascon/Implementations/crypto_aead/ascon128v12/opt64_3shares/permutations.c new file mode 100644 index 0000000..3aecaa6 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_3shares/permutations.c @@ -0,0 +1,26 @@ +#include "permutations.h" + +#include "round.h" + +#if ASCON_INLINE_PERM + +#elif ASCON_SINGLE_PERM + +void P(state_t* s, uint8_t rounds) { + printstate(" permutation input", s); + for (int i = START(rounds); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s) { P12ROUNDS(s); } + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16 +void P8(state_t* s) { P8ROUNDS(s); } +#endif + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 +void P6(state_t* s) { P6ROUNDS(s); } +#endif + +#endif diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_3shares/permutations.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64_3shares/permutations.h new file mode 100644 index 0000000..6172dd5 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_3shares/permutations.h @@ -0,0 +1,184 @@ +#ifndef PERMUTATIONS_H_ +#define PERMUTATIONS_H_ + +#include + +#include "api.h" +#include "ascon.h" +#include "printstate.h" +#include "round.h" + +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 + +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 + +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 + +#define ASCON_HASH_BYTES 32 + +#define ASCON_128_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_128A_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) + +#define ASCON_80PQ_IV \ + U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_HASH_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) + +#define ASCON_XOF_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) + +#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) +#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) +#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) +#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) +#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) + +#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) +#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) +#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) +#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) +#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 +#define IV ASCON_128_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#if ASCON_RATE == 16 +#define IV ASCON_128A_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define PB P8 +#endif + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 +#define IV ASCON_80PQ_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#define START(n) ((3 + (n)) << 4 | (12 - (n))) + +#if ASCON_UNROLL_LOOPS + +__forceinline void P12ROUNDS(state_t* s) { + ROUND(s, 0xf0); + ROUND(s, 0xe1); + ROUND(s, 0xd2); + ROUND(s, 0xc3); + ROUND(s, 0xb4); + ROUND(s, 0xa5); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +__forceinline void P8ROUNDS(state_t* s) { + ROUND(s, 0xb4); + ROUND(s, 0xa5); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +__forceinline void P6ROUNDS(state_t* s) { + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +#else /* !ASCON_UNROLL_LOOPS */ + +__forceinline void P12ROUNDS(state_t* s) { + for (int i = START(12); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +__forceinline void P8ROUNDS(state_t* s) { + for (int i = START(8); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +__forceinline void P6ROUNDS(state_t* s) { + for (int i = START(6); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +#endif + +#if ASCON_INLINE_PERM + +__forceinline void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +__forceinline void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} + +__forceinline void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#elif ASCON_SINGLE_PERM + +#define P12(s) P(s, 12) +#define P8(s) P(s, 8) +#define P6(s) P(s, 6) + +void P(state_t* s, uint8_t rounds); + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s); +void P8(state_t* s); +void P6(state_t* s); + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#endif + +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_3shares/printstate.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64_3shares/printstate.h new file mode 100644 index 0000000..34bd476 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_3shares/printstate.h @@ -0,0 +1,31 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_3shares/random.c b/ascon/Implementations/crypto_aead/ascon128v12/opt64_3shares/random.c new file mode 100644 index 0000000..81d0081 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_3shares/random.c @@ -0,0 +1,62 @@ + +#include "random.h" + +#include +#include + +#include "config.h" + +#if ASCON_MASK_RNG == 'R' + +void randinit() { srand(time(0)); } + +uint32_t rand32() { + uint32_t r; + randombytes(&r, 4); + return r; +} + +uint64_t rand64() { + uint64_t r; + randombytes(&r, 8); + return r; +} + +#elif ASCON_MASK_RNG == 'S' + +void randinit() { srand(time(0)); } + +uint32_t rand32() { return ((uint32_t)rand() << 21) ^ rand(); } + +uint64_t rand64() { + return ((uint64_t)rand() << 43) ^ ((uint64_t)rand() << 21) ^ rand(); +} + +#elif ASCON_MASK_RNG == 'X' + +uint32_t xorshift32; +uint64_t xorshift64; + +void randinit() { + srand(time(0)); + xorshift32 = rand(); + xorshift64 = (uint64_t)rand() << 32 | rand(); +} + +uint32_t rand32() { + uint32_t x = xorshift32; + x ^= x << 13; + x ^= x >> 17; + x ^= x << 5; + return xorshift32 = x; +} + +uint64_t rand64() { + uint64_t x = xorshift64; + x ^= x << 13; + x ^= x >> 7; + x ^= x << 17; + return xorshift64 = x; +} + +#endif diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_3shares/random.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64_3shares/random.h new file mode 100644 index 0000000..89226b6 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_3shares/random.h @@ -0,0 +1,10 @@ +#ifndef RANDOM_H_ +#define RANDOM_H_ + +#include + +void randinit(); +uint32_t rand32(); +uint64_t rand64(); + +#endif /* RANDOM_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_3shares/round.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64_3shares/round.h new file mode 100644 index 0000000..ef5ed5e --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_3shares/round.h @@ -0,0 +1,98 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" +#include "random.h" + +__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { + *K0 = TOSHARES(0); + *K1 = TOSHARES(0); + *K2 = TOSHARES(0); +} + +__forceinline void PINIT(state_t* s) { + randinit(); + s->x0 = TOSHARES(0); + s->x1 = TOSHARES(0); + s->x2 = TOSHARES(0); + s->x3 = TOSHARES(0); + s->x4 = TOSHARES(0); + s->rx = TOSHARES(0); +} + +#define TOFFOLI(a0, a1, a2, b0, b1, b2, c0, c1, c2) \ + do { \ + (a0) ^= (~(b0)) & (c0); \ + (a0) ^= (b0) & (c2); \ + (a0) ^= (b2) & (c0); \ + (a1) ^= (~(b1)) & (c1); \ + (a1) ^= (b1) & (c0); \ + (a1) ^= (b0) & (c1); \ + (a2) ^= (~(b2)) & (c2); \ + (a2) ^= (b2) & (c1); \ + (a2) ^= (b1) & (c2); \ + } while (0) + +__forceinline void ROUND(state_t* s, uint64_t C) { + /* refresh randomness */ + /* s->rx = TOSHARES(0); */ + /* addition of round constant */ + s->x2.s0 ^= C; + /* substitution layer */ + s->x0.s0 ^= s->x4.s0; + s->x4.s0 ^= s->x3.s0; + s->x2.s0 ^= s->x1.s0; + s->x0.s1 ^= s->x4.s1; + s->x4.s1 ^= s->x3.s1; + s->x2.s1 ^= s->x1.s1; + s->x0.s2 ^= s->x4.s2; + s->x4.s2 ^= s->x3.s2; + s->x2.s2 ^= s->x1.s2; + /* start of shared keccak s-box from https://eprint.iacr.org/2019/536 */ + s->rx.s2 = s->rx.s0; + s->rx.s0 ^= s->rx.s1; + TOFFOLI(s->rx.s0, s->rx.s1, s->rx.s2, s->x4.s0, s->x4.s1, s->x4.s2, s->x0.s0, + s->x0.s1, s->x0.s2); + TOFFOLI(s->x0.s0, s->x0.s1, s->x0.s2, s->x1.s0, s->x1.s1, s->x1.s2, s->x2.s0, + s->x2.s1, s->x2.s2); + TOFFOLI(s->x2.s0, s->x2.s1, s->x2.s2, s->x3.s0, s->x3.s1, s->x3.s2, s->x4.s0, + s->x4.s1, s->x4.s2); + TOFFOLI(s->x4.s0, s->x4.s1, s->x4.s2, s->x0.s0, s->x0.s1, s->x0.s2, s->x1.s0, + s->x1.s1, s->x1.s2); + TOFFOLI(s->x1.s0, s->x1.s1, s->x1.s2, s->x2.s0, s->x2.s1, s->x2.s2, s->x3.s0, + s->x3.s1, s->x3.s2); + s->x3.s2 ^= s->rx.s2; + s->x3.s1 ^= s->rx.s1; + s->x3.s0 ^= s->rx.s0; + /* end of shared keccak s-box */ + s->x1.s0 ^= s->x0.s0; + s->x0.s0 ^= s->x4.s0; + s->x3.s0 ^= s->x2.s0; + s->x2.s0 = ~s->x2.s0; + s->x1.s1 ^= s->x0.s1; + s->x0.s1 ^= s->x4.s1; + s->x3.s1 ^= s->x2.s1; + s->x1.s2 ^= s->x0.s2; + s->x0.s2 ^= s->x4.s2; + s->x3.s2 ^= s->x2.s2; + /* linear diffusion layer */ + s->x0.s2 ^= ROR64(s->x0.s2, 19) ^ ROR64(s->x0.s2, 28); + s->x1.s2 ^= ROR64(s->x1.s2, 61) ^ ROR64(s->x1.s2, 39); + s->x2.s2 ^= ROR64(s->x2.s2, 1) ^ ROR64(s->x2.s2, 6); + s->x3.s2 ^= ROR64(s->x3.s2, 10) ^ ROR64(s->x3.s2, 17); + s->x4.s2 ^= ROR64(s->x4.s2, 7) ^ ROR64(s->x4.s2, 41); + s->x0.s1 ^= ROR64(s->x0.s1, 19) ^ ROR64(s->x0.s1, 28); + s->x1.s1 ^= ROR64(s->x1.s1, 61) ^ ROR64(s->x1.s1, 39); + s->x2.s1 ^= ROR64(s->x2.s1, 1) ^ ROR64(s->x2.s1, 6); + s->x3.s1 ^= ROR64(s->x3.s1, 10) ^ ROR64(s->x3.s1, 17); + s->x4.s1 ^= ROR64(s->x4.s1, 7) ^ ROR64(s->x4.s1, 41); + s->x0.s0 ^= ROR64(s->x0.s0, 19) ^ ROR64(s->x0.s0, 28); + s->x1.s0 ^= ROR64(s->x1.s0, 61) ^ ROR64(s->x1.s0, 39); + s->x2.s0 ^= ROR64(s->x2.s0, 1) ^ ROR64(s->x2.s0, 6); + s->x3.s0 ^= ROR64(s->x3.s0, 10) ^ ROR64(s->x3.s0, 17); + s->x4.s0 ^= ROR64(s->x4.s0, 7) ^ ROR64(s->x4.s0, 41); + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_3shares/word.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64_3shares/word.h new file mode 100644 index 0000000..b18e527 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_3shares/word.h @@ -0,0 +1,87 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#include "config.h" +#include "random.h" + +typedef uint64_t share_t; + +typedef struct { + share_t s0; + share_t s1; + share_t s2; +} word_t; + +__forceinline word_t WORD_T(uint64_t x) { return (word_t){x, 0, 0}; } + +__forceinline uint64_t UINT64_T(word_t w) { return w.s0; } + +__forceinline word_t TOSHARES(share_t in) { + uint64_t r0 = rand64(); + uint64_t r1 = rand64(); + return (word_t){in ^ r0 ^ r1, r0, r1}; +} + +__forceinline share_t FROMSHARES(word_t in) { return in.s0 ^ in.s1 ^ in.s2; } + +__forceinline word_t U64TOWORD(uint64_t x) { +#if ASCON_MASK_LOADS + return TOSHARES(x); +#else + return WORD_T(x); +#endif +} + +__forceinline uint64_t WORDTOU64(word_t w) { return FROMSHARES(w); } + +#define XOR(a, b) \ + do { \ + word_t t = b; \ + (a).s0 ^= t.s0; \ + (a).s1 ^= t.s1; \ + (a).s2 ^= t.s2; \ + } while (0) + +#define AND(a, b) \ + do { \ + word_t ta = a; \ + word_t tb = b; \ + (a).s0 = (ta.s0 & tb.s0) ^ (ta.s0 & tb.s1) ^ (ta.s0 & tb.s2); \ + (a).s1 = (ta.s1 & tb.s0) ^ (ta.s1 & tb.s1) ^ (ta.s1 & tb.s2); \ + (a).s2 = (ta.s2 & tb.s0) ^ (ta.s2 & tb.s1) ^ (ta.s2 & tb.s2); \ + } while (0) + +__forceinline uint64_t ROR64(uint64_t x, int n) { + return x >> n | x << (64 - n); +} + +__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { + word_t r; + r.s0 = lo2hi.s0 << 32 | hi2lo.s0 >> 32; + r.s1 = lo2hi.s1 << 32 | hi2lo.s1 >> 32; + r.s2 = lo2hi.s2 << 32 | hi2lo.s2 >> 32; + return r; +} + +__forceinline int NOTZERO(word_t a, word_t b) { + int result = 0; + for (int i = 0; i < 8; ++i) + result |= ((uint8_t*)&(a.s0))[i] ^ ((uint8_t*)&(a.s1))[i] ^ + ((uint8_t*)&(a.s2))[i]; + for (int i = 0; i < 8; ++i) + result |= ((uint8_t*)&(b.s0))[i] ^ ((uint8_t*)&(b.s1))[i] ^ + ((uint8_t*)&(b.s2))[i]; + return result; +} + +/* set padding byte in 64-bit Ascon word */ +__forceinline word_t PAD(int i) { return WORD_T(0x80ull << (56 - 8 * i)); } + +/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ +__forceinline word_t XMASK(int n) { + return WORD_T(0x00ffffffffffffffull >> (n * 8 - 8)); +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/api.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/api.h index a4aa567..96a7c47 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/api.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/api.h @@ -3,3 +3,4 @@ #define CRYPTO_NPUBBYTES 16 #define CRYPTO_ABYTES 16 #define CRYPTO_NOOVERLAP 1 +#define ASCON_RATE 8 diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/ascon.c b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/ascon.c new file mode 100644 index 0000000..5e60f1a --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/ascon.c @@ -0,0 +1,63 @@ +#include "ascon.h" + +#include "api.h" +#include "loadstore.h" +#include "permutations.h" +#include "printstate.h" + +void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, + uint8_t mode); + +void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen, + const uint8_t* ad, uint64_t adlen, const uint8_t* npub, + const uint8_t* k, uint8_t mode) { + word_t N0, N1, K0, K1, K2; + /* load nonce */ + N0 = LOAD64(npub); + N1 = LOAD64(npub + 8); + /* load key */ + if (CRYPTO_KEYBYTES == 20) { + K0 = KEYROT(WORD_T(0), LOAD(k, 4)); + k += 4; + } + K1 = LOAD64(k); + K2 = LOAD64(k + 8); + /* initialization */ + s->x0 = IV; + if (CRYPTO_KEYBYTES == 20) XOR(s->x0, K0); + s->x1 = K1; + s->x2 = K2; + s->x3 = N0; + s->x4 = N1; + P12(s); + if (CRYPTO_KEYBYTES == 20) XOR(s->x2, K0); + XOR(s->x3, K1); + XOR(s->x4, K2); + printstate("initialization", s); + /* process associated data */ + if (adlen) { + process_data(s, (void*)0, ad, adlen, ASCON_AD); + PB(s); + } + XOR(s->x4, WORD_T(1)); + /* process plaintext/ciphertext */ + process_data(s, out, in, tlen, mode); + /* finalization */ + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) { + XOR(s->x1, K1); + XOR(s->x2, K2); + } + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { + XOR(s->x2, K1); + XOR(s->x3, K2); + } + if (CRYPTO_KEYBYTES == 20) { + XOR(s->x1, KEYROT(K0, K1)); + XOR(s->x2, KEYROT(K1, K2)); + XOR(s->x3, KEYROT(K2, WORD_T(0))); + } + P12(s); + XOR(s->x3, K1); + XOR(s->x4, K2); + printstate("finalization", s); +} diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/ascon.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/ascon.h new file mode 100644 index 0000000..aa685d3 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/ascon.h @@ -0,0 +1,24 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#include "config.h" +#include "word.h" + +typedef struct { + word_t x0, x1, x2, x3, x4; +} state_t; + +#define ASCON_AD 0 +#define ASCON_ENC 1 +#define ASCON_DEC 2 + +void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, + uint8_t mode); + +void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen, + const uint8_t* ad, uint64_t adlen, const uint8_t* npub, + const uint8_t* k, uint8_t mode); + +#endif // ASCON_H_ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/config.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/config.h new file mode 100644 index 0000000..ec8bd6f --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/config.h @@ -0,0 +1,34 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* inline the Ascon mode */ +#ifndef ASCON_INLINE_MODE +#define ASCON_INLINE_MODE 1 +#endif + +/* inline the Ascon permutations */ +#ifndef ASCON_INLINE_PERM +#define ASCON_INLINE_PERM 0 +#endif + +/* single function for all permutations */ +#ifndef ASCON_SINGLE_PERM +#define ASCON_SINGLE_PERM 1 +#endif + +/* unroll the permutation loops */ +#ifndef ASCON_UNROLL_LOOPS +#define ASCON_UNROLL_LOOPS 1 +#endif + +/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ +#ifndef ASCON_DATA_ACCESS +#define ASCON_DATA_ACCESS 'H' +#endif + +/* make sure __forceinline is supported */ +#ifndef __forceinline +#define __forceinline inline __attribute__((always_inline)) +#endif + +#endif /* CONFIG_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/decrypt.c b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/decrypt.c index 0cde81e..605bd03 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/decrypt.c +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/decrypt.c @@ -1,29 +1,32 @@ -#include "core.h" +#include "api.h" +#include "ascon.h" +#include "loadstore.h" +#include "permutations.h" +#include "printstate.h" -int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen, - unsigned char* nsec, const unsigned char* c, - unsigned long long clen, const unsigned char* ad, - unsigned long long adlen, const unsigned char* npub, - const unsigned char* k) { +void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen, + const uint8_t* ad, uint64_t adlen, const uint8_t* npub, + const uint8_t* k, uint8_t mode); + +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { if (clen < CRYPTO_ABYTES) { *mlen = 0; return -1; } - - state s; + state_t s; (void)nsec; - - // set plaintext size + /* set plaintext size */ *mlen = clen - CRYPTO_ABYTES; - + /* ascon decryption */ ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DEC); - - // verify tag (should be constant time, check compiler output) - if (((s.x3 ^ U64BIG(*(u64*)(c + *mlen))) | - (s.x4 ^ U64BIG(*(u64*)(c + *mlen + 8)))) != 0) { + /* verify tag (should be constant time, check compiler output) */ + XOR(s.x3, LOAD64(c + *mlen)); + XOR(s.x4, LOAD64(c + *mlen + 8)); + if (NOTZERO(s.x3, s.x4)) { *mlen = 0; return -1; } - return 0; } diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/encrypt.c b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/encrypt.c index 5961c60..641b266 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/encrypt.c +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/encrypt.c @@ -1,21 +1,25 @@ -#include "core.h" +#include "api.h" +#include "ascon.h" +#include "loadstore.h" +#include "permutations.h" +#include "printstate.h" -int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen, - const unsigned char* m, unsigned long long mlen, - const unsigned char* ad, unsigned long long adlen, - const unsigned char* nsec, const unsigned char* npub, - const unsigned char* k) { - state s; - (void)nsec; +void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen, + const uint8_t* ad, uint64_t adlen, const uint8_t* npub, + const uint8_t* k, uint8_t mode); - // set ciphertext size +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + state_t s; + (void)nsec; + /* set ciphertext size */ *clen = mlen + CRYPTO_ABYTES; - + /* ascon encryption */ ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENC); - - // set tag - *(u64*)(c + mlen) = U64BIG(s.x3); - *(u64*)(c + mlen + 8) = U64BIG(s.x4); - + /* set tag */ + STORE64(c + mlen, s.x3); + STORE64(c + mlen + 8, s.x4); return 0; } diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/endian.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/endian.h index b4d18f5..3944360 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/endian.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/endian.h @@ -3,7 +3,10 @@ #if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -// macros for big endian machines +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif #define U64BIG(x) (x) #define U32BIG(x) (x) #define U16BIG(x) (x) @@ -11,19 +14,26 @@ #elif defined(_MSC_VER) || \ (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -// macros for little endian machines -#define U64BIG(x) \ - ((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) | \ - (((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) | \ - (((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) | \ - (((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56)) -#define U32BIG(x) \ - ((((x)&0x000000FF) << 24) | (((x)&0x0000FF00) << 8) | \ - (((x)&0x00FF0000) >> 8) | (((x)&0xFF000000) >> 24)) -#define U16BIG(x) ((((x)&0x00FF) << 8) | (((x)&0xFF00) >> 8)) +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) +#define U32BIG(x) \ + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) #else -#error "ascon byte order macros not defined in endian.h" +#error "Ascon byte order macros not defined in endian.h" #endif -#endif // ENDIAN_H_ +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/loadstore.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/loadstore.h new file mode 100644 index 0000000..6cb0cca --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/loadstore.h @@ -0,0 +1,118 @@ +#ifndef LOADSTORE_H_ +#define LOADSTORE_H_ + +#include + +#include "config.h" +#include "endian.h" +#include "word.h" + +/* 64-bit LSB mask (undefined for n == 0) */ +#define MASK(n) (~0ull >> (64 - (n))) + +/* get byte from Ascon 64-bit word */ +#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i)))) + +/* set byte in Ascon 64-bit word */ +#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i))) + +#if ASCON_DATA_ACCESS == 'W' + +#ifndef NDEBUG +#pragma message("Using wordwise data access") +#endif + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = U64BIG(*(uint64_t*)bytes); + return U64TOWORD(x); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = U64BIG(*(uint64_t*)bytes & MASK(8 * n)); + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(8 * n); + *(uint64_t*)bytes |= U64BIG(x); +} + +#elif ASCON_DATA_ACCESS == 'M' + +#ifndef NDEBUG +#pragma message("Using memcpy to access data") +#endif + +#include + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + memcpy((uint8_t*)&x, bytes, n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = U64BIG(WORDTOU64(w)); + memcpy(bytes, (uint8_t*)&x, n); +} + +#elif ASCON_DATA_ACCESS == 'B' + +#ifndef NDEBUG +#pragma message("Using bytewise data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#elif ASCON_DATA_ACCESS == 'H' + +#ifndef NDEBUG +#pragma message("Using hybrid data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + if (n == 8) + x = U64BIG(*(uint64_t*)bytes); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + if (n == 8) + *(uint64_t*)bytes = U64BIG(x); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#else +#error "Ascon data access macro not defined correctly" +#endif + +#endif /* LOADSTORE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/permutations.c b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/permutations.c index 9aaf9d1..3aecaa6 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/permutations.c +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/permutations.c @@ -1,8 +1,26 @@ #include "permutations.h" -void P(state *p, u8 rounds) { - state s = *p; - u8 i, start = START_CONSTANT(rounds); - for (i = start; i > 0x4a; i -= 0x0f) ROUND(i); - *p = s; +#include "round.h" + +#if ASCON_INLINE_PERM + +#elif ASCON_SINGLE_PERM + +void P(state_t* s, uint8_t rounds) { + printstate(" permutation input", s); + for (int i = START(rounds); i > 0x4a; i -= 0x0f) ROUND(s, i); } + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s) { P12ROUNDS(s); } + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16 +void P8(state_t* s) { P8ROUNDS(s); } +#endif + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 +void P6(state_t* s) { P6ROUNDS(s); } +#endif + +#endif diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/permutations.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/permutations.h index 7143e82..6172dd5 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/permutations.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/permutations.h @@ -1,66 +1,184 @@ #ifndef PERMUTATIONS_H_ #define PERMUTATIONS_H_ -typedef unsigned char u8; -typedef unsigned long long u64; - -typedef struct { - u64 x0, x1, x2, x3, x4; -} state; - -#define EXT_BYTE64(x, n) ((u8)((u64)(x) >> (8 * (7 - (n))))) -#define INS_BYTE64(x, n) ((u64)(x) << (8 * (7 - (n)))) -#define ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) -#define START_CONSTANT(x) (((0xf - (12 - (x))) << 4) | (12 - (x))) - -#define ROUND(C) \ - do { \ - state t; \ - s.x2 ^= C; \ - s.x0 ^= s.x4; \ - s.x4 ^= s.x3; \ - s.x2 ^= s.x1; \ - t.x0 = s.x0; \ - t.x4 = s.x4; \ - t.x3 = s.x3; \ - t.x1 = s.x1; \ - t.x2 = s.x2; \ - s.x0 = t.x0 ^ ((~t.x1) & t.x2); \ - s.x2 = t.x2 ^ ((~t.x3) & t.x4); \ - s.x4 = t.x4 ^ ((~t.x0) & t.x1); \ - s.x1 = t.x1 ^ ((~t.x2) & t.x3); \ - s.x3 = t.x3 ^ ((~t.x4) & t.x0); \ - s.x1 ^= s.x0; \ - t.x1 = s.x1; \ - s.x1 = ROTR64(s.x1, 39); \ - s.x3 ^= s.x2; \ - t.x2 = s.x2; \ - s.x2 = ROTR64(s.x2, 1); \ - t.x4 = s.x4; \ - t.x2 ^= s.x2; \ - s.x2 = ROTR64(s.x2, 6 - 1); \ - t.x3 = s.x3; \ - t.x1 ^= s.x1; \ - s.x3 = ROTR64(s.x3, 10); \ - s.x0 ^= s.x4; \ - s.x4 = ROTR64(s.x4, 7); \ - t.x3 ^= s.x3; \ - s.x2 ^= t.x2; \ - s.x1 = ROTR64(s.x1, 61 - 39); \ - t.x0 = s.x0; \ - s.x2 = ~s.x2; \ - s.x3 = ROTR64(s.x3, 17 - 10); \ - t.x4 ^= s.x4; \ - s.x4 = ROTR64(s.x4, 41 - 7); \ - s.x3 ^= t.x3; \ - s.x1 ^= t.x1; \ - s.x0 = ROTR64(s.x0, 19); \ - s.x4 ^= t.x4; \ - t.x0 ^= s.x0; \ - s.x0 = ROTR64(s.x0, 28 - 19); \ - s.x0 ^= t.x0; \ - } while (0) - -void P(state *p, u8 rounds); - -#endif // PERMUTATIONS_H_ +#include + +#include "api.h" +#include "ascon.h" +#include "printstate.h" +#include "round.h" + +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 + +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 + +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 + +#define ASCON_HASH_BYTES 32 + +#define ASCON_128_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_128A_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) + +#define ASCON_80PQ_IV \ + U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_HASH_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) + +#define ASCON_XOF_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) + +#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) +#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) +#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) +#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) +#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) + +#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) +#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) +#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) +#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) +#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 +#define IV ASCON_128_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#if ASCON_RATE == 16 +#define IV ASCON_128A_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define PB P8 +#endif + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 +#define IV ASCON_80PQ_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#define START(n) ((3 + (n)) << 4 | (12 - (n))) + +#if ASCON_UNROLL_LOOPS + +__forceinline void P12ROUNDS(state_t* s) { + ROUND(s, 0xf0); + ROUND(s, 0xe1); + ROUND(s, 0xd2); + ROUND(s, 0xc3); + ROUND(s, 0xb4); + ROUND(s, 0xa5); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +__forceinline void P8ROUNDS(state_t* s) { + ROUND(s, 0xb4); + ROUND(s, 0xa5); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +__forceinline void P6ROUNDS(state_t* s) { + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +#else /* !ASCON_UNROLL_LOOPS */ + +__forceinline void P12ROUNDS(state_t* s) { + for (int i = START(12); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +__forceinline void P8ROUNDS(state_t* s) { + for (int i = START(8); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +__forceinline void P6ROUNDS(state_t* s) { + for (int i = START(6); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +#endif + +#if ASCON_INLINE_PERM + +__forceinline void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +__forceinline void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} + +__forceinline void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#elif ASCON_SINGLE_PERM + +#define P12(s) P(s, 12) +#define P8(s) P(s, 8) +#define P6(s) P(s, 6) + +void P(state_t* s, uint8_t rounds); + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s); +void P8(state_t* s); +void P6(state_t* s); + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#endif + +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/printstate.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/printstate.h new file mode 100644 index 0000000..34bd476 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/printstate.h @@ -0,0 +1,31 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/process.c b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/process.c new file mode 100644 index 0000000..81fdd15 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/process.c @@ -0,0 +1,31 @@ +#include "api.h" +#include "ascon.h" +#include "loadstore.h" +#include "permutations.h" +#include "printstate.h" + +void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, + uint8_t mode) { + word_t tmp0; + int n = 0; + while (len) { + /* determine block size */ + n = len < ASCON_RATE ? len : ASCON_RATE; + /* absorb data */ + tmp0 = LOAD(in, n); + XOR(s->x0, tmp0); + /* extract data */ + if (mode != ASCON_AD) STORE(out, s->x0, n); + /* insert data */ + if (mode == ASCON_DEC) { + AND(s->x0, XMASK(n)); + XOR(s->x0, tmp0); + } + /* compute permutation for full blocks */ + if (n == ASCON_RATE) PB(s); + in += n; + out += n; + len -= n; + } + XOR(s->x0, PAD(n % 8)); +} diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/round.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/round.h new file mode 100644 index 0000000..077cbfd --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/round.h @@ -0,0 +1,69 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" + +__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { + *K0 = WORD_T(0); + *K1 = WORD_T(0); + *K2 = WORD_T(0); +} + +__forceinline void PINIT(state_t* s) { + s->x0 = WORD_T(0); + s->x1 = WORD_T(0); + s->x2 = WORD_T(0); + s->x3 = WORD_T(0); + s->x4 = WORD_T(0); +} + +__forceinline void ROUND(state_t* s, uint64_t C) { + state_t t; + s->x2 ^= C; + s->x0 ^= s->x4; + s->x4 ^= s->x3; + s->x2 ^= s->x1; + t.x0 = s->x0; + t.x4 = s->x4; + t.x3 = s->x3; + t.x1 = s->x1; + t.x2 = s->x2; + s->x0 = t.x0 ^ (~t.x1 & t.x2); + s->x2 = t.x2 ^ (~t.x3 & t.x4); + s->x4 = t.x4 ^ (~t.x0 & t.x1); + s->x1 = t.x1 ^ (~t.x2 & t.x3); + s->x3 = t.x3 ^ (~t.x4 & t.x0); + s->x1 ^= s->x0; + t.x1 = s->x1; + s->x1 = ROR64(s->x1, 39); + s->x3 ^= s->x2; + t.x2 = s->x2; + s->x2 = ROR64(s->x2, 1); + t.x4 = s->x4; + t.x2 ^= s->x2; + s->x2 = ROR64(s->x2, 6 - 1); + t.x3 = s->x3; + t.x1 ^= s->x1; + s->x3 = ROR64(s->x3, 10); + s->x0 ^= s->x4; + s->x4 = ROR64(s->x4, 7); + t.x3 ^= s->x3; + s->x2 ^= t.x2; + s->x1 = ROR64(s->x1, 61 - 39); + t.x0 = s->x0; + s->x2 = ~s->x2; + s->x3 = ROR64(s->x3, 17 - 10); + t.x4 ^= s->x4; + s->x4 = ROR64(s->x4, 41 - 7); + s->x3 ^= t.x3; + s->x1 ^= t.x1; + s->x0 = ROR64(s->x0, 19); + s->x4 ^= t.x4; + t.x0 ^= s->x0; + s->x0 = ROR64(s->x0, 28 - 19); + s->x0 ^= t.x0; + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/word.h b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/word.h new file mode 100644 index 0000000..5d601bb --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/opt64_lowsize/word.h @@ -0,0 +1,47 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#include "config.h" + +typedef uint64_t word_t; + +#define WORD_T +#define UINT64_T + +#define U64TOWORD +#define WORDTOU64 + +#define XOR(a, b) \ + do { \ + (a) ^= (b); \ + } while (0) + +#define AND(a, b) \ + do { \ + (a) &= (b); \ + } while (0) + +__forceinline word_t ROR64(word_t x, int n) { return x >> n | x << (64 - n); } + +__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { + return lo2hi << 32 | hi2lo >> 32; +} + +__forceinline int NOTZERO(word_t a, word_t b) { + int result = 0; + for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i]; + for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i]; + return result; +} + +/* set padding byte in 64-bit Ascon word */ +__forceinline word_t PAD(int i) { return WORD_T(0x80ull << (56 - 8 * i)); } + +/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ +__forceinline word_t XMASK(int n) { + return WORD_T(0x00ffffffffffffffull >> (n * 8 - 8)); +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/ref/ascon.h b/ascon/Implementations/crypto_aead/ascon128v12/ref/ascon.h new file mode 100644 index 0000000..8ab0502 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/ref/ascon.h @@ -0,0 +1,14 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#define WORDTOU64 + +typedef uint64_t word_t; + +typedef struct { + word_t x0, x1, x2, x3, x4; +} state_t; + +#endif // ASCON_H_ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/ref/decrypt.c b/ascon/Implementations/crypto_aead/ascon128v12/ref/decrypt.c index 308e40f..cfda43e 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/ref/decrypt.c +++ b/ascon/Implementations/crypto_aead/ascon128v12/ref/decrypt.c @@ -1,94 +1,90 @@ #include "api.h" +#include "ascon.h" +#include "loadstore.h" #include "permutations.h" +#include "printstate.h" -#define RATE (64 / 8) -#define PA_ROUNDS 12 -#define PB_ROUNDS 6 -#define IV \ - ((u64)(8 * (CRYPTO_KEYBYTES)) << 56 | (u64)(8 * (RATE)) << 48 | \ - (u64)(PA_ROUNDS) << 40 | (u64)(PB_ROUNDS) << 32) +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { + uint64_t K0, K1, N0, N1; + state_t s; + (void)nsec; -int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen, - unsigned char* nsec, const unsigned char* c, - unsigned long long clen, const unsigned char* ad, - unsigned long long adlen, const unsigned char* npub, - const unsigned char* k) { if (clen < CRYPTO_ABYTES) { *mlen = 0; return -1; } - const u64 K0 = BYTES_TO_U64(k, 8); - const u64 K1 = BYTES_TO_U64(k + 8, 8); - const u64 N0 = BYTES_TO_U64(npub, 8); - const u64 N1 = BYTES_TO_U64(npub + 8, 8); - state s; - u64 c0; - (void)nsec; - - // set plaintext size + /* set plaintext size */ *mlen = clen - CRYPTO_ABYTES; - // initialization - s.x0 = IV; + /* load key and nonce */ + K0 = LOAD(k, 8); + K1 = LOAD(k + 8, 8); + N0 = LOAD(npub, 8); + N1 = LOAD(npub + 8, 8); + + /* initialization */ + s.x0 = ASCON_128_IV; s.x1 = K0; s.x2 = K1; s.x3 = N0; s.x4 = N1; - printstate("initial value:", s); P12(&s); s.x3 ^= K0; s.x4 ^= K1; - printstate("initialization:", s); + printstate("initialization", &s); - // process associated data + /* process associated data */ if (adlen) { - while (adlen >= RATE) { - s.x0 ^= BYTES_TO_U64(ad, 8); + while (adlen >= ASCON_128_RATE) { + s.x0 ^= LOAD(ad, 8); P6(&s); - adlen -= RATE; - ad += RATE; + ad += ASCON_128_RATE; + adlen -= ASCON_128_RATE; } - s.x0 ^= BYTES_TO_U64(ad, adlen); - s.x0 ^= 0x80ull << (56 - 8 * adlen); + /* final associated data block */ + s.x0 ^= LOAD(ad, adlen); + s.x0 ^= PAD(adlen); P6(&s); } s.x4 ^= 1; - printstate("process associated data:", s); + printstate("process associated data", &s); - // process plaintext + /* process ciphertext */ clen -= CRYPTO_ABYTES; - while (clen >= RATE) { - c0 = BYTES_TO_U64(c, 8); - U64_TO_BYTES(m, s.x0 ^ c0, 8); + while (clen >= ASCON_128_RATE) { + uint64_t c0 = LOAD(c, 8); + STORE(m, s.x0 ^ c0, 8); s.x0 = c0; P6(&s); - clen -= RATE; - m += RATE; - c += RATE; + m += ASCON_128_RATE; + c += ASCON_128_RATE; + clen -= ASCON_128_RATE; } - c0 = BYTES_TO_U64(c, clen); - U64_TO_BYTES(m, s.x0 ^ c0, clen); - s.x0 &= ~BYTE_MASK(clen); + /* final ciphertext block */ + uint64_t c0 = LOAD(c, clen); + STORE(m, s.x0 ^ c0, clen); + s.x0 &= ~MASK(clen); s.x0 |= c0; - s.x0 ^= 0x80ull << (56 - 8 * clen); + s.x0 ^= PAD(clen); c += clen; - printstate("process plaintext:", s); + printstate("process ciphertext", &s); - // finalization + /* finalization */ s.x1 ^= K0; s.x2 ^= K1; P12(&s); s.x3 ^= K0; s.x4 ^= K1; - printstate("finalization:", s); + printstate("finalization", &s); - // verify tag (should be constant time, check compiler output) - if (((s.x3 ^ BYTES_TO_U64(c, 8)) | (s.x4 ^ BYTES_TO_U64(c + 8, 8))) != 0) { + /* verify tag (should be constant time, check compiler output) */ + if ((s.x3 ^ LOAD(c, 8)) | (s.x4 ^ LOAD(c + 8, 8))) { *mlen = 0; return -1; } return 0; } - diff --git a/ascon/Implementations/crypto_aead/ascon128v12/ref/encrypt.c b/ascon/Implementations/crypto_aead/ascon128v12/ref/encrypt.c index 8aee510..cd76506 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/ref/encrypt.c +++ b/ascon/Implementations/crypto_aead/ascon128v12/ref/encrypt.c @@ -1,82 +1,80 @@ #include "api.h" +#include "ascon.h" +#include "loadstore.h" #include "permutations.h" +#include "printstate.h" -#define RATE (64 / 8) -#define PA_ROUNDS 12 -#define PB_ROUNDS 6 -#define IV \ - ((u64)(8 * (CRYPTO_KEYBYTES)) << 56 | (u64)(8 * (RATE)) << 48 | \ - (u64)(PA_ROUNDS) << 40 | (u64)(PB_ROUNDS) << 32) - -int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen, - const unsigned char* m, unsigned long long mlen, - const unsigned char* ad, unsigned long long adlen, - const unsigned char* nsec, const unsigned char* npub, - const unsigned char* k) { - const u64 K0 = BYTES_TO_U64(k, 8); - const u64 K1 = BYTES_TO_U64(k + 8, 8); - const u64 N0 = BYTES_TO_U64(npub, 8); - const u64 N1 = BYTES_TO_U64(npub + 8, 8); - state s; +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + uint64_t K0, K1, N0, N1; + state_t s; (void)nsec; - // set ciphertext size + /* set ciphertext size */ *clen = mlen + CRYPTO_ABYTES; - // initialization - s.x0 = IV; + /* load key and nonce */ + K0 = LOAD(k, 8); + K1 = LOAD(k + 8, 8); + N0 = LOAD(npub, 8); + N1 = LOAD(npub + 8, 8); + + /* initialization */ + s.x0 = ASCON_128_IV; s.x1 = K0; s.x2 = K1; s.x3 = N0; s.x4 = N1; - printstate("initial value:", s); P12(&s); s.x3 ^= K0; s.x4 ^= K1; - printstate("initialization:", s); + printstate("initialization", &s); - // process associated data + /* process associated data */ if (adlen) { - while (adlen >= RATE) { - s.x0 ^= BYTES_TO_U64(ad, 8); + while (adlen >= ASCON_128_RATE) { + s.x0 ^= LOAD(ad, 8); P6(&s); - adlen -= RATE; - ad += RATE; + ad += ASCON_128_RATE; + adlen -= ASCON_128_RATE; } - s.x0 ^= BYTES_TO_U64(ad, adlen); - s.x0 ^= 0x80ull << (56 - 8 * adlen); + /* final associated data block */ + s.x0 ^= LOAD(ad, adlen); + s.x0 ^= PAD(adlen); P6(&s); } s.x4 ^= 1; - printstate("process associated data:", s); + printstate("process associated data", &s); - // process plaintext - while (mlen >= RATE) { - s.x0 ^= BYTES_TO_U64(m, 8); - U64_TO_BYTES(c, s.x0, 8); + /* process plaintext */ + while (mlen >= ASCON_128_RATE) { + s.x0 ^= LOAD(m, 8); + STORE(c, s.x0, 8); P6(&s); - mlen -= RATE; - m += RATE; - c += RATE; + m += ASCON_128_RATE; + c += ASCON_128_RATE; + mlen -= ASCON_128_RATE; } - s.x0 ^= BYTES_TO_U64(m, mlen); - s.x0 ^= 0x80ull << (56 - 8 * mlen); - U64_TO_BYTES(c, s.x0, mlen); + /* final plaintext block */ + s.x0 ^= LOAD(m, mlen); + STORE(c, s.x0, mlen); + s.x0 ^= PAD(mlen); c += mlen; - printstate("process plaintext:", s); + printstate("process plaintext", &s); - // finalization + /* finalization */ s.x1 ^= K0; s.x2 ^= K1; P12(&s); s.x3 ^= K0; s.x4 ^= K1; - printstate("finalization:", s); + printstate("finalization", &s); - // set tag - U64_TO_BYTES(c, s.x3, 8); - U64_TO_BYTES(c + 8, s.x4, 8); + /* set tag */ + STORE(c, s.x3, 8); + STORE(c + 8, s.x4, 8); return 0; } - diff --git a/ascon/Implementations/crypto_aead/ascon128v12/ref/loadstore.h b/ascon/Implementations/crypto_aead/ascon128v12/ref/loadstore.h new file mode 100644 index 0000000..eccf0e3 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/ref/loadstore.h @@ -0,0 +1,31 @@ +#ifndef LOADSTORE_H_ +#define LOADSTORE_H_ + +#include + +/* get byte from Ascon 64-bit word */ +#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i)))) + +/* set byte in Ascon 64-bit word */ +#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i))) + +/* set padding byte in Ascon 64-bit word */ +#define PAD(i) SETBYTE(0x80, i) + +static inline uint64_t MASK(int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) x |= SETBYTE(0xff, i); + return x; +} + +static inline uint64_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) x |= SETBYTE(bytes[i], i); + return x; +} + +static inline void STORE(uint8_t* bytes, uint64_t x, int n) { + for (int i = 0; i < n; ++i) bytes[i] = GETBYTE(x, i); +} + +#endif /* LOADSTORE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/ref/permutations.h b/ascon/Implementations/crypto_aead/ascon128v12/ref/permutations.h index 3317a9f..ef6dd68 100644 --- a/ascon/Implementations/crypto_aead/ascon128v12/ref/permutations.h +++ b/ascon/Implementations/crypto_aead/ascon128v12/ref/permutations.h @@ -1,132 +1,88 @@ #ifndef PERMUTATIONS_H_ #define PERMUTATIONS_H_ -#ifdef DEBUG -#include -#endif +#include -typedef unsigned char u8; -typedef unsigned long long u64; +#include "ascon.h" +#include "printstate.h" +#include "round.h" -typedef struct { - u64 x0, x1, x2, x3, x4; -} state; +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 -static inline void printstate(const char* text, const state s) { -#ifdef DEBUG - printf("%s\n", text); - printf(" x0=%016llx\n", s.x0); - printf(" x1=%016llx\n", s.x1); - printf(" x2=%016llx\n", s.x2); - printf(" x3=%016llx\n", s.x3); - printf(" x4=%016llx\n", s.x4); -#else - // disable warning about unused parameters - (void)text; - (void)s; -#endif -} +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 -static inline u64 BYTES_TO_U64(const u8* bytes, int n) { - int i; - u64 x = 0; - for (i = 0; i < n; i++) x |= ((u64)bytes[i]) << (56 - 8 * i); - return x; -} +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 -static inline void U64_TO_BYTES(u8* bytes, const u64 x, int n) { - int i; - for (i = 0; i < n; i++) bytes[i] = (u8)(x >> (56 - 8 * i)); -} +#define ASCON_HASH_BYTES 32 -static inline u64 BYTE_MASK(int n) { - int i; - u64 x = 0; - for (i = 0; i < n; i++) x |= 0xffull << (56 - 8 * i); - return x; -} +#define ASCON_128_IV \ + (((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) -static inline u64 ROTR64(u64 x, int n) { return (x << (64 - n)) | (x >> n); } +#define ASCON_128A_IV \ + (((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) -static inline void ROUND(u8 C, state* p) { - state s = *p; - state t; - // addition of round constant - s.x2 ^= C; - printstate(" addition of round constant:", s); - // substitution layer - s.x0 ^= s.x4; - s.x4 ^= s.x3; - s.x2 ^= s.x1; - // start of keccak s-box - t.x0 = ~s.x0; - t.x1 = ~s.x1; - t.x2 = ~s.x2; - t.x3 = ~s.x3; - t.x4 = ~s.x4; - t.x0 &= s.x1; - t.x1 &= s.x2; - t.x2 &= s.x3; - t.x3 &= s.x4; - t.x4 &= s.x0; - s.x0 ^= t.x1; - s.x1 ^= t.x2; - s.x2 ^= t.x3; - s.x3 ^= t.x4; - s.x4 ^= t.x0; - // end of keccak s-box - s.x1 ^= s.x0; - s.x0 ^= s.x4; - s.x3 ^= s.x2; - s.x2 = ~s.x2; - printstate(" substitution layer:", s); - // linear diffusion layer - s.x0 ^= ROTR64(s.x0, 19) ^ ROTR64(s.x0, 28); - s.x1 ^= ROTR64(s.x1, 61) ^ ROTR64(s.x1, 39); - s.x2 ^= ROTR64(s.x2, 1) ^ ROTR64(s.x2, 6); - s.x3 ^= ROTR64(s.x3, 10) ^ ROTR64(s.x3, 17); - s.x4 ^= ROTR64(s.x4, 7) ^ ROTR64(s.x4, 41); - printstate(" linear diffusion layer:", s); - *p = s; -} +#define ASCON_80PQ_IV \ + (((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) -static inline void P12(state* s) { - printstate(" permutation input:", *s); - ROUND(0xf0, s); - ROUND(0xe1, s); - ROUND(0xd2, s); - ROUND(0xc3, s); - ROUND(0xb4, s); - ROUND(0xa5, s); - ROUND(0x96, s); - ROUND(0x87, s); - ROUND(0x78, s); - ROUND(0x69, s); - ROUND(0x5a, s); - ROUND(0x4b, s); -} +#define ASCON_HASH_IV \ + (((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) -static inline void P8(state* s) { - printstate(" permutation input:", *s); - ROUND(0xb4, s); - ROUND(0xa5, s); - ROUND(0x96, s); - ROUND(0x87, s); - ROUND(0x78, s); - ROUND(0x69, s); - ROUND(0x5a, s); - ROUND(0x4b, s); +#define ASCON_XOF_IV \ + (((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) + +static inline void P12(state_t* s) { + printstate(" permutation input", s); + ROUND(s, 0xf0); + ROUND(s, 0xe1); + ROUND(s, 0xd2); + ROUND(s, 0xc3); + ROUND(s, 0xb4); + ROUND(s, 0xa5); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); } -static inline void P6(state* s) { - printstate(" permutation input:", *s); - ROUND(0x96, s); - ROUND(0x87, s); - ROUND(0x78, s); - ROUND(0x69, s); - ROUND(0x5a, s); - ROUND(0x4b, s); +static inline void P8(state_t* s) { + printstate(" permutation input", s); + ROUND(s, 0xb4); + ROUND(s, 0xa5); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); } -#endif // PERMUTATIONS_H_ +static inline void P6(state_t* s) { + printstate(" permutation input", s); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/ref/printstate.h b/ascon/Implementations/crypto_aead/ascon128v12/ref/printstate.h new file mode 100644 index 0000000..34bd476 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/ref/printstate.h @@ -0,0 +1,31 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon128v12/ref/round.h b/ascon/Implementations/crypto_aead/ascon128v12/ref/round.h new file mode 100644 index 0000000..dcb6c81 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon128v12/ref/round.h @@ -0,0 +1,40 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" + +static inline uint64_t ROR64(uint64_t x, int n) { + return (x << (64 - n)) | (x >> n); +} + +static inline void ROUND(state_t* s, uint8_t C) { + state_t t; + /* addition of round constant */ + s->x2 ^= C; + /* substitution layer */ + s->x0 ^= s->x4; + s->x4 ^= s->x3; + s->x2 ^= s->x1; + /* start of keccak s-box */ + t.x0 = s->x0 ^ (~s->x1 & s->x2); + t.x1 = s->x1 ^ (~s->x2 & s->x3); + t.x2 = s->x2 ^ (~s->x3 & s->x4); + t.x3 = s->x3 ^ (~s->x4 & s->x0); + t.x4 = s->x4 ^ (~s->x0 & s->x1); + /* end of keccak s-box */ + t.x1 ^= t.x0; + t.x0 ^= t.x4; + t.x3 ^= t.x2; + t.x2 = ~t.x2; + /* printstate(" substitution layer", &t); */ + /* linear diffusion layer */ + s->x0 = t.x0 ^ ROR64(t.x0, 19) ^ ROR64(t.x0, 28); + s->x1 = t.x1 ^ ROR64(t.x1, 61) ^ ROR64(t.x1, 39); + s->x2 = t.x2 ^ ROR64(t.x2, 1) ^ ROR64(t.x2, 6); + s->x3 = t.x3 ^ ROR64(t.x3, 10) ^ ROR64(t.x3, 17); + s->x4 = t.x4 ^ ROR64(t.x4, 7) ^ ROR64(t.x4, 41); + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/api.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/api.h new file mode 100644 index 0000000..5fa0140 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/api.h @@ -0,0 +1,6 @@ +#define CRYPTO_KEYBYTES 20 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 +#define ASCON_RATE 8 diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/ascon.c b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/ascon.c new file mode 100644 index 0000000..9011a77 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/ascon.c @@ -0,0 +1,250 @@ +#include "ascon.h" + +#include "api.h" +#include "loadstore.h" +#include "permutations.h" +#include "printstate.h" + +__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2, + const uint8_t* k) { + KINIT(K0, K1, K2); + if (CRYPTO_KEYBYTES == 20) { + XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4))); + k += 4; + } + XOR(*K1, LOAD64(k)); + XOR(*K2, LOAD64(k + 8)); +} + +__forceinline void init(state_t* s, const uint8_t* npub, const uint8_t* k) { + word_t N0, N1; + word_t K0, K1, K2; + /* load nonce */ + N0 = LOAD64(npub); + N1 = LOAD64(npub + 8); + /* load key */ + loadkey(&K0, &K1, &K2, k); + /* initialization */ + PINIT(s); + XOR(s->x0, IV); + if (CRYPTO_KEYBYTES == 20) XOR(s->x0, K0); + XOR(s->x1, K1); + XOR(s->x2, K2); + XOR(s->x3, N0); + XOR(s->x4, N1); + P12(s); + if (CRYPTO_KEYBYTES == 20) XOR(s->x2, K0); + XOR(s->x3, K1); + XOR(s->x4, K2); + printstate("initialization", s); +} + +__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + word_t* restrict px; + /* process associated data */ + if (adlen) { + while (adlen >= ASCON_RATE) { + XOR(s->x0, LOAD64(ad)); + if (ASCON_RATE == 16) XOR(s->x1, LOAD64(ad + 8)); + PB(s); + ad += ASCON_RATE; + adlen -= ASCON_RATE; + } + /* final associated data block */ + px = &s->x0; + if (ASCON_RATE == 16 && adlen >= 8) { + XOR(s->x0, LOAD64(ad)); + px = &s->x1; + ad += 8; + adlen -= 8; + } + if (adlen) XOR(*px, LOAD(ad, adlen)); + XOR(*px, PAD(adlen)); + PB(s); + } + XOR(s->x4, WORD_T(1)); + printstate("process associated data", s); +} + +__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m, + uint64_t mlen) { + word_t* restrict px; + /* process plaintext */ + while (mlen >= ASCON_RATE) { + XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + if (ASCON_RATE == 16) { + XOR(s->x1, LOAD64(m + 8)); + STORE64(c + 8, s->x1); + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + mlen -= ASCON_RATE; + } + /* final plaintext block */ + px = &s->x0; + if (ASCON_RATE == 16 && mlen >= 8) { + XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + px = &s->x1; + m += 8; + c += 8; + mlen -= 8; + } + if (mlen) { + XOR(*px, LOAD(m, mlen)); + STORE(c, *px, mlen); + } + XOR(*px, PAD(mlen)); + printstate("process plaintext", s); +} + +__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c, + uint64_t clen) { + word_t* restrict px; + word_t cx; + /* process ciphertext */ + while (clen >= ASCON_RATE) { + cx = LOAD64(c); + XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + if (ASCON_RATE == 16) { + cx = LOAD64(c + 8); + XOR(s->x1, cx); + STORE64(m + 8, s->x1); + s->x1 = cx; + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + clen -= ASCON_RATE; + } + /* final ciphertext block */ + px = &s->x0; + if (ASCON_RATE == 16 && clen >= 8) { + cx = LOAD64(c); + XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + px = &s->x1; + m += 8; + c += 8; + clen -= 8; + } + if (clen) { + cx = LOAD(c, clen); + XOR(*px, cx); + STORE(m, *px, clen); + AND(*px, XMASK(clen)); + XOR(*px, cx); + } + XOR(*px, PAD(clen)); + printstate("process ciphertext", s); +} + +__forceinline void final(state_t* s, const uint8_t* k) { + word_t K0, K1, K2; + /* load key */ + loadkey(&K0, &K1, &K2, k); + /* finalization */ + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) { + XOR(s->x1, K1); + XOR(s->x2, K2); + } + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { + XOR(s->x2, K1); + XOR(s->x3, K2); + } + if (CRYPTO_KEYBYTES == 20) { + XOR(s->x1, KEYROT(K0, K1)); + XOR(s->x2, KEYROT(K1, K2)); + XOR(s->x3, KEYROT(K2, WORD_T(0))); + } + P12(s); + XOR(s->x3, K1); + XOR(s->x4, K2); + printstate("finalization", s); +} + +#if !ASCON_INLINE_MODE + +#define INIT ascon_init +#define ABSORB ascon_absorb +#define ENCRYPT ascon_encrypt +#define DECRYPT ascon_decrypt +#define FINAL ascon_final + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) { + init(s, npub, k); +} + +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + absorb(s, ad, adlen); +} + +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) { + encrypt(s, c, m, mlen); +} + +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) { + decrypt(s, m, c, clen); +} + +void ascon_final(state_t* s, const uint8_t* k) { final(s, k); } + +#else + +#define INIT init +#define ABSORB absorb +#define ENCRYPT encrypt +#define DECRYPT decrypt +#define FINAL final + +#endif + +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + state_t s; + (void)nsec; + *clen = mlen + CRYPTO_ABYTES; + /* perform ascon computation */ + INIT(&s, npub, k); + ABSORB(&s, ad, adlen); + ENCRYPT(&s, c, m, mlen); + FINAL(&s, k); + /* set tag */ + c += mlen; + STORE64(c, s.x3); + STORE64(c + 8, s.x4); + return 0; +} + +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { + state_t s; + (void)nsec; + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + *mlen = clen = clen - CRYPTO_ABYTES; + /* perform ascon computation */ + INIT(&s, npub, k); + ABSORB(&s, ad, adlen); + DECRYPT(&s, m, c, clen); + FINAL(&s, k); + /* verify tag (should be constant time, check compiler output) */ + c += clen; + XOR(s.x3, LOAD64(c)); + XOR(s.x4, LOAD64(c + 8)); + if (NOTZERO(s.x3, s.x4)) { + *mlen = 0; + return -1; + } + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/ascon.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/ascon.h new file mode 100644 index 0000000..10a5b6e --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/ascon.h @@ -0,0 +1,19 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#include "config.h" +#include "word.h" + +typedef struct { + word_t x0, x1, x2, x3, x4; +} state_t; + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k); +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen); +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); +void ascon_final(state_t* s, const uint8_t* k); + +#endif // ASCON_H_ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/config.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/config.h new file mode 100644 index 0000000..b1b5080 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/config.h @@ -0,0 +1,34 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* inline the Ascon mode */ +#ifndef ASCON_INLINE_MODE +#define ASCON_INLINE_MODE 1 +#endif + +/* inline the Ascon permutations */ +#ifndef ASCON_INLINE_PERM +#define ASCON_INLINE_PERM 0 +#endif + +/* single function for all permutations */ +#ifndef ASCON_SINGLE_PERM +#define ASCON_SINGLE_PERM 0 +#endif + +/* unroll the permutation loops */ +#ifndef ASCON_UNROLL_LOOPS +#define ASCON_UNROLL_LOOPS 0 +#endif + +/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ +#ifndef ASCON_DATA_ACCESS +#define ASCON_DATA_ACCESS 'M' +#endif + +/* make sure __forceinline is supported */ +#ifndef __forceinline +#define __forceinline inline __attribute__((always_inline)) +#endif + +#endif /* CONFIG_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/endian.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/endian.h new file mode 100644 index 0000000..3944360 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/endian.h @@ -0,0 +1,39 @@ +#ifndef ENDIAN_H_ +#define ENDIAN_H_ + +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif +#define U64BIG(x) (x) +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +#elif defined(_MSC_VER) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) +#define U32BIG(x) \ + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) + +#else +#error "Ascon byte order macros not defined in endian.h" +#endif + +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/implementors b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/implementors new file mode 100644 index 0000000..b110c1a --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/implementors @@ -0,0 +1,2 @@ +Christoph Dobraunig +Martin Schläffer diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/loadstore.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/loadstore.h new file mode 100644 index 0000000..6cb0cca --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/loadstore.h @@ -0,0 +1,118 @@ +#ifndef LOADSTORE_H_ +#define LOADSTORE_H_ + +#include + +#include "config.h" +#include "endian.h" +#include "word.h" + +/* 64-bit LSB mask (undefined for n == 0) */ +#define MASK(n) (~0ull >> (64 - (n))) + +/* get byte from Ascon 64-bit word */ +#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i)))) + +/* set byte in Ascon 64-bit word */ +#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i))) + +#if ASCON_DATA_ACCESS == 'W' + +#ifndef NDEBUG +#pragma message("Using wordwise data access") +#endif + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = U64BIG(*(uint64_t*)bytes); + return U64TOWORD(x); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = U64BIG(*(uint64_t*)bytes & MASK(8 * n)); + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(8 * n); + *(uint64_t*)bytes |= U64BIG(x); +} + +#elif ASCON_DATA_ACCESS == 'M' + +#ifndef NDEBUG +#pragma message("Using memcpy to access data") +#endif + +#include + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + memcpy((uint8_t*)&x, bytes, n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = U64BIG(WORDTOU64(w)); + memcpy(bytes, (uint8_t*)&x, n); +} + +#elif ASCON_DATA_ACCESS == 'B' + +#ifndef NDEBUG +#pragma message("Using bytewise data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#elif ASCON_DATA_ACCESS == 'H' + +#ifndef NDEBUG +#pragma message("Using hybrid data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + if (n == 8) + x = U64BIG(*(uint64_t*)bytes); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + if (n == 8) + *(uint64_t*)bytes = U64BIG(x); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#else +#error "Ascon data access macro not defined correctly" +#endif + +#endif /* LOADSTORE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/permutations.c b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/permutations.c new file mode 100644 index 0000000..1bca2ef --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/permutations.c @@ -0,0 +1,35 @@ +#include "permutations.h" + +#include "round.h" + +#if !ASCON_UNROLL_LOOPS || ASCON_SINGLE_PERM + +const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, + {0x6, 0xc}, {0x3, 0xc}, {0x6, 0x9}, {0x3, 0x9}, + {0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}}; + +#endif + +#if ASCON_INLINE_PERM + +#elif ASCON_SINGLE_PERM + +void P(state_t* s, uint8_t rounds) { + printstate(" permutation input", s); + for (int i = START(rounds); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s) { P12ROUNDS(s); } + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16 +void P8(state_t* s) { P8ROUNDS(s); } +#endif + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 +void P6(state_t* s) { P6ROUNDS(s); } +#endif + +#endif diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/permutations.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/permutations.h new file mode 100644 index 0000000..ef338f1 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/permutations.h @@ -0,0 +1,189 @@ +#ifndef PERMUTATIONS_H_ +#define PERMUTATIONS_H_ + +#include + +#include "api.h" +#include "ascon.h" +#include "printstate.h" +#include "round.h" + +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 + +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 + +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 + +#define ASCON_HASH_BYTES 32 + +#define ASCON_128_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_128A_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) + +#define ASCON_80PQ_IV \ + U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_HASH_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) + +#define ASCON_XOF_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) + +#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) +#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) +#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) +#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) +#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) + +#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) +#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) +#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) +#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) +#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 +#define IV ASCON_128_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#if ASCON_RATE == 16 +#define IV ASCON_128A_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define PB P8 +#endif + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 +#define IV ASCON_80PQ_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#define START(n) (12 - n) + +#if ASCON_UNROLL_LOOPS + +__forceinline void P12ROUNDS(state_t* s) { + ROUND(s, 0xc, 0xc); + ROUND(s, 0x9, 0xc); + ROUND(s, 0xc, 0x9); + ROUND(s, 0x9, 0x9); + ROUND(s, 0x6, 0xc); + ROUND(s, 0x3, 0xc); + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +__forceinline void P8ROUNDS(state_t* s) { + ROUND(s, 0x6, 0xc); + ROUND(s, 0x3, 0xc); + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +__forceinline void P6ROUNDS(state_t* s) { + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +#else /* !ASCON_UNROLL_LOOPS */ + +extern const uint8_t constants[][2]; + +__forceinline void P12ROUNDS(state_t* s) { + for (int i = START(12); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +__forceinline void P8ROUNDS(state_t* s) { + for (int i = START(8); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +__forceinline void P6ROUNDS(state_t* s) { + for (int i = START(6); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +#endif + +#if ASCON_INLINE_PERM + +__forceinline void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +__forceinline void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} + +__forceinline void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#elif ASCON_SINGLE_PERM + +#define P12(s) P(s, 12) +#define P8(s) P(s, 8) +#define P6(s) P(s, 6) + +void P(state_t* s, uint8_t rounds); + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s); +void P8(state_t* s); +void P6(state_t* s); + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#endif + +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/printstate.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/printstate.h new file mode 100644 index 0000000..34bd476 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/printstate.h @@ -0,0 +1,31 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/round.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/round.h new file mode 100644 index 0000000..d8ea3b6 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/round.h @@ -0,0 +1,85 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" + +__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { + *K0 = WORD_T(0); + *K1 = WORD_T(0); + *K2 = WORD_T(0); +} + +__forceinline void PINIT(state_t* s) { + s->x0 = WORD_T(0); + s->x1 = WORD_T(0); + s->x2 = WORD_T(0); + s->x3 = WORD_T(0); + s->x4 = WORD_T(0); +} + +__forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) { + state_t t; + /* round constant */ + s->x2.e ^= C_e; + s->x2.o ^= C_o; + /* s-box layer */ + s->x0.e ^= s->x4.e; + s->x0.o ^= s->x4.o; + s->x4.e ^= s->x3.e; + s->x4.o ^= s->x3.o; + s->x2.e ^= s->x1.e; + s->x2.o ^= s->x1.o; + t.x0.e = s->x0.e; + t.x0.o = s->x0.o; + t.x4.e = s->x4.e; + t.x4.o = s->x4.o; + t.x3.e = s->x3.e; + t.x3.o = s->x3.o; + t.x1.e = s->x1.e; + t.x1.o = s->x1.o; + t.x2.e = s->x2.e; + t.x2.o = s->x2.o; + s->x0.e = t.x0.e ^ (~t.x1.e & t.x2.e); + s->x0.o = t.x0.o ^ (~t.x1.o & t.x2.o); + s->x2.e = t.x2.e ^ (~t.x3.e & t.x4.e); + s->x2.o = t.x2.o ^ (~t.x3.o & t.x4.o); + s->x4.e = t.x4.e ^ (~t.x0.e & t.x1.e); + s->x4.o = t.x4.o ^ (~t.x0.o & t.x1.o); + s->x1.e = t.x1.e ^ (~t.x2.e & t.x3.e); + s->x1.o = t.x1.o ^ (~t.x2.o & t.x3.o); + s->x3.e = t.x3.e ^ (~t.x4.e & t.x0.e); + s->x3.o = t.x3.o ^ (~t.x4.o & t.x0.o); + s->x1.e ^= s->x0.e; + s->x1.o ^= s->x0.o; + s->x3.e ^= s->x2.e; + s->x3.o ^= s->x2.o; + s->x0.e ^= s->x4.e; + s->x0.o ^= s->x4.o; + /* linear layer */ + t.x0.e = s->x0.e ^ ROR32(s->x0.o, 4); + t.x0.o = s->x0.o ^ ROR32(s->x0.e, 5); + t.x1.e = s->x1.e ^ ROR32(s->x1.e, 11); + t.x1.o = s->x1.o ^ ROR32(s->x1.o, 11); + t.x2.e = s->x2.e ^ ROR32(s->x2.o, 2); + t.x2.o = s->x2.o ^ ROR32(s->x2.e, 3); + t.x3.e = s->x3.e ^ ROR32(s->x3.o, 3); + t.x3.o = s->x3.o ^ ROR32(s->x3.e, 4); + t.x4.e = s->x4.e ^ ROR32(s->x4.e, 17); + t.x4.o = s->x4.o ^ ROR32(s->x4.o, 17); + s->x0.e ^= ROR32(t.x0.o, 9); + s->x0.o ^= ROR32(t.x0.e, 10); + s->x1.e ^= ROR32(t.x1.o, 19); + s->x1.o ^= ROR32(t.x1.e, 20); + s->x2.e ^= t.x2.o; + s->x2.o ^= ROR32(t.x2.e, 1); + s->x3.e ^= ROR32(t.x3.e, 5); + s->x3.o ^= ROR32(t.x3.o, 5); + s->x4.e ^= ROR32(t.x4.o, 3); + s->x4.o ^= ROR32(t.x4.e, 4); + s->x2.e = ~s->x2.e; + s->x2.o = ~s->x2.o; + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/word.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/word.h new file mode 100644 index 0000000..8ffcaaa --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32/word.h @@ -0,0 +1,117 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#include "config.h" + +typedef struct { + uint32_t e; + uint32_t o; +} word_t; + +__forceinline word_t WORD_T(uint64_t x) { + return (word_t){.o = x >> 32, .e = x}; +} + +__forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; } + +__forceinline uint64_t TOBI32(uint64_t in); + +__forceinline uint64_t FROMBI32(uint64_t in); + +__forceinline word_t U64TOWORD(uint64_t x) { + uint64_t w = TOBI32(x); + return (word_t){.o = w >> 32, .e = w}; +} + +__forceinline uint64_t WORDTOU64(word_t w) { + return FROMBI32((uint64_t)w.o << 32 | w.e); +} + +#define XOR(a, b) \ + do { \ + word_t tb = b; \ + (a).e ^= tb.e; \ + (a).o ^= tb.o; \ + } while (0) + +#define AND(a, b) \ + do { \ + word_t tb = b; \ + (a).e &= tb.e; \ + (a).o &= tb.o; \ + } while (0) + +__forceinline uint32_t ROR32(uint32_t x, int n) { + return x >> n | x << (32 - n); +} + +__forceinline word_t ROR64(word_t x, int n) { + word_t r; + r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); + r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); + return r; +} + +__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { + word_t r; + r.o = lo2hi.o << 16 | hi2lo.o >> 16; + r.e = lo2hi.e << 16 | hi2lo.e >> 16; + return r; +} + +__forceinline int NOTZERO(word_t a, word_t b) { + int result = 0; + for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i]; + for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i]; + return result; +} + +/* set padding byte in 64-bit Ascon word */ +__forceinline word_t PAD(int i) { + return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32); +} + +/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ +__forceinline word_t XMASK(int n) { + uint32_t mask = 0x0fffffff >> (n * 4 - 4); + return WORD_T((uint64_t)mask << 32 | mask); +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t TOBI32(uint64_t in) { + uint32_t hi = in >> 32; + uint32_t lo = in; + uint32_t r0, r1; + r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); + r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); + r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); + r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); + r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); + r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); + r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); + r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); + r0 = (lo & 0x0000FFFF) | (hi << 16); + r1 = (lo >> 16) | (hi & 0xFFFF0000); + return (uint64_t)r1 << 32 | r0; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t FROMBI32(uint64_t in) { + uint32_t r0 = in; + uint32_t r1 = in >> 32; + uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); + uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000); + r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); + r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); + r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); + r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); + r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); + r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); + r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); + r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); + return (uint64_t)hi << 32 | lo; +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/api.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/api.h new file mode 100644 index 0000000..5fa0140 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/api.h @@ -0,0 +1,6 @@ +#define CRYPTO_KEYBYTES 20 +#define CRYPTO_NSECBYTES 0 +#define CRYPTO_NPUBBYTES 16 +#define CRYPTO_ABYTES 16 +#define CRYPTO_NOOVERLAP 1 +#define ASCON_RATE 8 diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/ascon.c b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/ascon.c new file mode 100644 index 0000000..5e60f1a --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/ascon.c @@ -0,0 +1,63 @@ +#include "ascon.h" + +#include "api.h" +#include "loadstore.h" +#include "permutations.h" +#include "printstate.h" + +void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, + uint8_t mode); + +void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen, + const uint8_t* ad, uint64_t adlen, const uint8_t* npub, + const uint8_t* k, uint8_t mode) { + word_t N0, N1, K0, K1, K2; + /* load nonce */ + N0 = LOAD64(npub); + N1 = LOAD64(npub + 8); + /* load key */ + if (CRYPTO_KEYBYTES == 20) { + K0 = KEYROT(WORD_T(0), LOAD(k, 4)); + k += 4; + } + K1 = LOAD64(k); + K2 = LOAD64(k + 8); + /* initialization */ + s->x0 = IV; + if (CRYPTO_KEYBYTES == 20) XOR(s->x0, K0); + s->x1 = K1; + s->x2 = K2; + s->x3 = N0; + s->x4 = N1; + P12(s); + if (CRYPTO_KEYBYTES == 20) XOR(s->x2, K0); + XOR(s->x3, K1); + XOR(s->x4, K2); + printstate("initialization", s); + /* process associated data */ + if (adlen) { + process_data(s, (void*)0, ad, adlen, ASCON_AD); + PB(s); + } + XOR(s->x4, WORD_T(1)); + /* process plaintext/ciphertext */ + process_data(s, out, in, tlen, mode); + /* finalization */ + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) { + XOR(s->x1, K1); + XOR(s->x2, K2); + } + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { + XOR(s->x2, K1); + XOR(s->x3, K2); + } + if (CRYPTO_KEYBYTES == 20) { + XOR(s->x1, KEYROT(K0, K1)); + XOR(s->x2, KEYROT(K1, K2)); + XOR(s->x3, KEYROT(K2, WORD_T(0))); + } + P12(s); + XOR(s->x3, K1); + XOR(s->x4, K2); + printstate("finalization", s); +} diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/ascon.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/ascon.h new file mode 100644 index 0000000..aa685d3 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/ascon.h @@ -0,0 +1,24 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#include "config.h" +#include "word.h" + +typedef struct { + word_t x0, x1, x2, x3, x4; +} state_t; + +#define ASCON_AD 0 +#define ASCON_ENC 1 +#define ASCON_DEC 2 + +void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, + uint8_t mode); + +void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen, + const uint8_t* ad, uint64_t adlen, const uint8_t* npub, + const uint8_t* k, uint8_t mode); + +#endif // ASCON_H_ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/config.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/config.h new file mode 100644 index 0000000..19426ab --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/config.h @@ -0,0 +1,34 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* inline the Ascon mode */ +#ifndef ASCON_INLINE_MODE +#define ASCON_INLINE_MODE 1 +#endif + +/* inline the Ascon permutations */ +#ifndef ASCON_INLINE_PERM +#define ASCON_INLINE_PERM 0 +#endif + +/* single function for all permutations */ +#ifndef ASCON_SINGLE_PERM +#define ASCON_SINGLE_PERM 1 +#endif + +/* unroll the permutation loops */ +#ifndef ASCON_UNROLL_LOOPS +#define ASCON_UNROLL_LOOPS 1 +#endif + +/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ +#ifndef ASCON_DATA_ACCESS +#define ASCON_DATA_ACCESS 'B' +#endif + +/* make sure __forceinline is supported */ +#ifndef __forceinline +#define __forceinline inline __attribute__((always_inline)) +#endif + +#endif /* CONFIG_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/decrypt.c b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/decrypt.c new file mode 100644 index 0000000..605bd03 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/decrypt.c @@ -0,0 +1,32 @@ +#include "api.h" +#include "ascon.h" +#include "loadstore.h" +#include "permutations.h" +#include "printstate.h" + +void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen, + const uint8_t* ad, uint64_t adlen, const uint8_t* npub, + const uint8_t* k, uint8_t mode); + +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + state_t s; + (void)nsec; + /* set plaintext size */ + *mlen = clen - CRYPTO_ABYTES; + /* ascon decryption */ + ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DEC); + /* verify tag (should be constant time, check compiler output) */ + XOR(s.x3, LOAD64(c + *mlen)); + XOR(s.x4, LOAD64(c + *mlen + 8)); + if (NOTZERO(s.x3, s.x4)) { + *mlen = 0; + return -1; + } + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/encrypt.c b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/encrypt.c new file mode 100644 index 0000000..641b266 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/encrypt.c @@ -0,0 +1,25 @@ +#include "api.h" +#include "ascon.h" +#include "loadstore.h" +#include "permutations.h" +#include "printstate.h" + +void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen, + const uint8_t* ad, uint64_t adlen, const uint8_t* npub, + const uint8_t* k, uint8_t mode); + +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + state_t s; + (void)nsec; + /* set ciphertext size */ + *clen = mlen + CRYPTO_ABYTES; + /* ascon encryption */ + ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENC); + /* set tag */ + STORE64(c + mlen, s.x3); + STORE64(c + mlen + 8, s.x4); + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/endian.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/endian.h new file mode 100644 index 0000000..3944360 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/endian.h @@ -0,0 +1,39 @@ +#ifndef ENDIAN_H_ +#define ENDIAN_H_ + +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif +#define U64BIG(x) (x) +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +#elif defined(_MSC_VER) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) +#define U32BIG(x) \ + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) + +#else +#error "Ascon byte order macros not defined in endian.h" +#endif + +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/implementors b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/implementors new file mode 100644 index 0000000..b110c1a --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/implementors @@ -0,0 +1,2 @@ +Christoph Dobraunig +Martin Schläffer diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/loadstore.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/loadstore.h new file mode 100644 index 0000000..6cb0cca --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/loadstore.h @@ -0,0 +1,118 @@ +#ifndef LOADSTORE_H_ +#define LOADSTORE_H_ + +#include + +#include "config.h" +#include "endian.h" +#include "word.h" + +/* 64-bit LSB mask (undefined for n == 0) */ +#define MASK(n) (~0ull >> (64 - (n))) + +/* get byte from Ascon 64-bit word */ +#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i)))) + +/* set byte in Ascon 64-bit word */ +#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i))) + +#if ASCON_DATA_ACCESS == 'W' + +#ifndef NDEBUG +#pragma message("Using wordwise data access") +#endif + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = U64BIG(*(uint64_t*)bytes); + return U64TOWORD(x); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = U64BIG(*(uint64_t*)bytes & MASK(8 * n)); + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(8 * n); + *(uint64_t*)bytes |= U64BIG(x); +} + +#elif ASCON_DATA_ACCESS == 'M' + +#ifndef NDEBUG +#pragma message("Using memcpy to access data") +#endif + +#include + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + memcpy((uint8_t*)&x, bytes, n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = U64BIG(WORDTOU64(w)); + memcpy(bytes, (uint8_t*)&x, n); +} + +#elif ASCON_DATA_ACCESS == 'B' + +#ifndef NDEBUG +#pragma message("Using bytewise data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#elif ASCON_DATA_ACCESS == 'H' + +#ifndef NDEBUG +#pragma message("Using hybrid data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + if (n == 8) + x = U64BIG(*(uint64_t*)bytes); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + if (n == 8) + *(uint64_t*)bytes = U64BIG(x); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#else +#error "Ascon data access macro not defined correctly" +#endif + +#endif /* LOADSTORE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/permutations.c b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/permutations.c new file mode 100644 index 0000000..1bca2ef --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/permutations.c @@ -0,0 +1,35 @@ +#include "permutations.h" + +#include "round.h" + +#if !ASCON_UNROLL_LOOPS || ASCON_SINGLE_PERM + +const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, + {0x6, 0xc}, {0x3, 0xc}, {0x6, 0x9}, {0x3, 0x9}, + {0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}}; + +#endif + +#if ASCON_INLINE_PERM + +#elif ASCON_SINGLE_PERM + +void P(state_t* s, uint8_t rounds) { + printstate(" permutation input", s); + for (int i = START(rounds); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s) { P12ROUNDS(s); } + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16 +void P8(state_t* s) { P8ROUNDS(s); } +#endif + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 +void P6(state_t* s) { P6ROUNDS(s); } +#endif + +#endif diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/permutations.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/permutations.h new file mode 100644 index 0000000..ef338f1 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/permutations.h @@ -0,0 +1,189 @@ +#ifndef PERMUTATIONS_H_ +#define PERMUTATIONS_H_ + +#include + +#include "api.h" +#include "ascon.h" +#include "printstate.h" +#include "round.h" + +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 + +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 + +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 + +#define ASCON_HASH_BYTES 32 + +#define ASCON_128_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_128A_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) + +#define ASCON_80PQ_IV \ + U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_HASH_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) + +#define ASCON_XOF_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) + +#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) +#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) +#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) +#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) +#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) + +#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) +#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) +#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) +#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) +#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 +#define IV ASCON_128_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#if ASCON_RATE == 16 +#define IV ASCON_128A_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define PB P8 +#endif + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 +#define IV ASCON_80PQ_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#define START(n) (12 - n) + +#if ASCON_UNROLL_LOOPS + +__forceinline void P12ROUNDS(state_t* s) { + ROUND(s, 0xc, 0xc); + ROUND(s, 0x9, 0xc); + ROUND(s, 0xc, 0x9); + ROUND(s, 0x9, 0x9); + ROUND(s, 0x6, 0xc); + ROUND(s, 0x3, 0xc); + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +__forceinline void P8ROUNDS(state_t* s) { + ROUND(s, 0x6, 0xc); + ROUND(s, 0x3, 0xc); + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +__forceinline void P6ROUNDS(state_t* s) { + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +#else /* !ASCON_UNROLL_LOOPS */ + +extern const uint8_t constants[][2]; + +__forceinline void P12ROUNDS(state_t* s) { + for (int i = START(12); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +__forceinline void P8ROUNDS(state_t* s) { + for (int i = START(8); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +__forceinline void P6ROUNDS(state_t* s) { + for (int i = START(6); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +#endif + +#if ASCON_INLINE_PERM + +__forceinline void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +__forceinline void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} + +__forceinline void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#elif ASCON_SINGLE_PERM + +#define P12(s) P(s, 12) +#define P8(s) P(s, 8) +#define P6(s) P(s, 6) + +void P(state_t* s, uint8_t rounds); + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s); +void P8(state_t* s); +void P6(state_t* s); + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#endif + +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/printstate.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/printstate.h new file mode 100644 index 0000000..34bd476 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/printstate.h @@ -0,0 +1,31 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/process.c b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/process.c new file mode 100644 index 0000000..81fdd15 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/process.c @@ -0,0 +1,31 @@ +#include "api.h" +#include "ascon.h" +#include "loadstore.h" +#include "permutations.h" +#include "printstate.h" + +void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, + uint8_t mode) { + word_t tmp0; + int n = 0; + while (len) { + /* determine block size */ + n = len < ASCON_RATE ? len : ASCON_RATE; + /* absorb data */ + tmp0 = LOAD(in, n); + XOR(s->x0, tmp0); + /* extract data */ + if (mode != ASCON_AD) STORE(out, s->x0, n); + /* insert data */ + if (mode == ASCON_DEC) { + AND(s->x0, XMASK(n)); + XOR(s->x0, tmp0); + } + /* compute permutation for full blocks */ + if (n == ASCON_RATE) PB(s); + in += n; + out += n; + len -= n; + } + XOR(s->x0, PAD(n % 8)); +} diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/round.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/round.h new file mode 100644 index 0000000..d8ea3b6 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/round.h @@ -0,0 +1,85 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" + +__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { + *K0 = WORD_T(0); + *K1 = WORD_T(0); + *K2 = WORD_T(0); +} + +__forceinline void PINIT(state_t* s) { + s->x0 = WORD_T(0); + s->x1 = WORD_T(0); + s->x2 = WORD_T(0); + s->x3 = WORD_T(0); + s->x4 = WORD_T(0); +} + +__forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) { + state_t t; + /* round constant */ + s->x2.e ^= C_e; + s->x2.o ^= C_o; + /* s-box layer */ + s->x0.e ^= s->x4.e; + s->x0.o ^= s->x4.o; + s->x4.e ^= s->x3.e; + s->x4.o ^= s->x3.o; + s->x2.e ^= s->x1.e; + s->x2.o ^= s->x1.o; + t.x0.e = s->x0.e; + t.x0.o = s->x0.o; + t.x4.e = s->x4.e; + t.x4.o = s->x4.o; + t.x3.e = s->x3.e; + t.x3.o = s->x3.o; + t.x1.e = s->x1.e; + t.x1.o = s->x1.o; + t.x2.e = s->x2.e; + t.x2.o = s->x2.o; + s->x0.e = t.x0.e ^ (~t.x1.e & t.x2.e); + s->x0.o = t.x0.o ^ (~t.x1.o & t.x2.o); + s->x2.e = t.x2.e ^ (~t.x3.e & t.x4.e); + s->x2.o = t.x2.o ^ (~t.x3.o & t.x4.o); + s->x4.e = t.x4.e ^ (~t.x0.e & t.x1.e); + s->x4.o = t.x4.o ^ (~t.x0.o & t.x1.o); + s->x1.e = t.x1.e ^ (~t.x2.e & t.x3.e); + s->x1.o = t.x1.o ^ (~t.x2.o & t.x3.o); + s->x3.e = t.x3.e ^ (~t.x4.e & t.x0.e); + s->x3.o = t.x3.o ^ (~t.x4.o & t.x0.o); + s->x1.e ^= s->x0.e; + s->x1.o ^= s->x0.o; + s->x3.e ^= s->x2.e; + s->x3.o ^= s->x2.o; + s->x0.e ^= s->x4.e; + s->x0.o ^= s->x4.o; + /* linear layer */ + t.x0.e = s->x0.e ^ ROR32(s->x0.o, 4); + t.x0.o = s->x0.o ^ ROR32(s->x0.e, 5); + t.x1.e = s->x1.e ^ ROR32(s->x1.e, 11); + t.x1.o = s->x1.o ^ ROR32(s->x1.o, 11); + t.x2.e = s->x2.e ^ ROR32(s->x2.o, 2); + t.x2.o = s->x2.o ^ ROR32(s->x2.e, 3); + t.x3.e = s->x3.e ^ ROR32(s->x3.o, 3); + t.x3.o = s->x3.o ^ ROR32(s->x3.e, 4); + t.x4.e = s->x4.e ^ ROR32(s->x4.e, 17); + t.x4.o = s->x4.o ^ ROR32(s->x4.o, 17); + s->x0.e ^= ROR32(t.x0.o, 9); + s->x0.o ^= ROR32(t.x0.e, 10); + s->x1.e ^= ROR32(t.x1.o, 19); + s->x1.o ^= ROR32(t.x1.e, 20); + s->x2.e ^= t.x2.o; + s->x2.o ^= ROR32(t.x2.e, 1); + s->x3.e ^= ROR32(t.x3.e, 5); + s->x3.o ^= ROR32(t.x3.o, 5); + s->x4.e ^= ROR32(t.x4.o, 3); + s->x4.o ^= ROR32(t.x4.e, 4); + s->x2.e = ~s->x2.e; + s->x2.o = ~s->x2.o; + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/word.c b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/word.c new file mode 100644 index 0000000..0ac4e63 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/word.c @@ -0,0 +1,36 @@ +#include "word.h" + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +uint64_t TOBI32(uint64_t in) { + uint32_t hi = in >> 32; + uint32_t lo = in; + uint32_t r0, r1; + r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); + r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); + r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); + r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); + r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); + r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); + r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); + r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); + r0 = (lo & 0x0000FFFF) | (hi << 16); + r1 = (lo >> 16) | (hi & 0xFFFF0000); + return (uint64_t)r1 << 32 | r0; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +uint64_t FROMBI32(uint64_t in) { + uint32_t r0 = in; + uint32_t r1 = in >> 32; + uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); + uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000); + r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); + r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); + r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); + r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); + r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); + r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); + r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); + r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); + return (uint64_t)hi << 32 | lo; +} diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/word.h b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/word.h new file mode 100644 index 0000000..45184ca --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/bi32_lowsize/word.h @@ -0,0 +1,82 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#include "config.h" + +typedef struct { + uint32_t e; + uint32_t o; +} word_t; + +__forceinline word_t WORD_T(uint64_t x) { + return (word_t){.o = x >> 32, .e = x}; +} + +__forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; } + +uint64_t TOBI32(uint64_t in); + +uint64_t FROMBI32(uint64_t in); + +__forceinline word_t U64TOWORD(uint64_t x) { + uint64_t w = TOBI32(x); + return (word_t){.o = w >> 32, .e = w}; +} + +__forceinline uint64_t WORDTOU64(word_t w) { + return FROMBI32((uint64_t)w.o << 32 | w.e); +} + +#define XOR(a, b) \ + do { \ + word_t tb = b; \ + (a).e ^= tb.e; \ + (a).o ^= tb.o; \ + } while (0) + +#define AND(a, b) \ + do { \ + word_t tb = b; \ + (a).e &= tb.e; \ + (a).o &= tb.o; \ + } while (0) + +__forceinline uint32_t ROR32(uint32_t x, int n) { + return x >> n | x << (32 - n); +} + +__forceinline word_t ROR64(word_t x, int n) { + word_t r; + r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); + r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); + return r; +} + +__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { + word_t r; + r.o = lo2hi.o << 16 | hi2lo.o >> 16; + r.e = lo2hi.e << 16 | hi2lo.e >> 16; + return r; +} + +__forceinline int NOTZERO(word_t a, word_t b) { + int result = 0; + for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i]; + for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i]; + return result; +} + +/* set padding byte in 64-bit Ascon word */ +__forceinline word_t PAD(int i) { + return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32); +} + +/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ +__forceinline word_t XMASK(int n) { + uint32_t mask = 0x0fffffff >> (n * 4 - 4); + return WORD_T((uint64_t)mask << 32 | mask); +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/api.h b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/api.h index 4b53d6c..5fa0140 100644 --- a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/api.h +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/api.h @@ -3,3 +3,4 @@ #define CRYPTO_NPUBBYTES 16 #define CRYPTO_ABYTES 16 #define CRYPTO_NOOVERLAP 1 +#define ASCON_RATE 8 diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/ascon.c b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/ascon.c new file mode 100644 index 0000000..9011a77 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/ascon.c @@ -0,0 +1,250 @@ +#include "ascon.h" + +#include "api.h" +#include "loadstore.h" +#include "permutations.h" +#include "printstate.h" + +__forceinline void loadkey(word_t* K0, word_t* K1, word_t* K2, + const uint8_t* k) { + KINIT(K0, K1, K2); + if (CRYPTO_KEYBYTES == 20) { + XOR(*K0, KEYROT(WORD_T(0), LOAD(k, 4))); + k += 4; + } + XOR(*K1, LOAD64(k)); + XOR(*K2, LOAD64(k + 8)); +} + +__forceinline void init(state_t* s, const uint8_t* npub, const uint8_t* k) { + word_t N0, N1; + word_t K0, K1, K2; + /* load nonce */ + N0 = LOAD64(npub); + N1 = LOAD64(npub + 8); + /* load key */ + loadkey(&K0, &K1, &K2, k); + /* initialization */ + PINIT(s); + XOR(s->x0, IV); + if (CRYPTO_KEYBYTES == 20) XOR(s->x0, K0); + XOR(s->x1, K1); + XOR(s->x2, K2); + XOR(s->x3, N0); + XOR(s->x4, N1); + P12(s); + if (CRYPTO_KEYBYTES == 20) XOR(s->x2, K0); + XOR(s->x3, K1); + XOR(s->x4, K2); + printstate("initialization", s); +} + +__forceinline void absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + word_t* restrict px; + /* process associated data */ + if (adlen) { + while (adlen >= ASCON_RATE) { + XOR(s->x0, LOAD64(ad)); + if (ASCON_RATE == 16) XOR(s->x1, LOAD64(ad + 8)); + PB(s); + ad += ASCON_RATE; + adlen -= ASCON_RATE; + } + /* final associated data block */ + px = &s->x0; + if (ASCON_RATE == 16 && adlen >= 8) { + XOR(s->x0, LOAD64(ad)); + px = &s->x1; + ad += 8; + adlen -= 8; + } + if (adlen) XOR(*px, LOAD(ad, adlen)); + XOR(*px, PAD(adlen)); + PB(s); + } + XOR(s->x4, WORD_T(1)); + printstate("process associated data", s); +} + +__forceinline void encrypt(state_t* s, uint8_t* c, const uint8_t* m, + uint64_t mlen) { + word_t* restrict px; + /* process plaintext */ + while (mlen >= ASCON_RATE) { + XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + if (ASCON_RATE == 16) { + XOR(s->x1, LOAD64(m + 8)); + STORE64(c + 8, s->x1); + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + mlen -= ASCON_RATE; + } + /* final plaintext block */ + px = &s->x0; + if (ASCON_RATE == 16 && mlen >= 8) { + XOR(s->x0, LOAD64(m)); + STORE64(c, s->x0); + px = &s->x1; + m += 8; + c += 8; + mlen -= 8; + } + if (mlen) { + XOR(*px, LOAD(m, mlen)); + STORE(c, *px, mlen); + } + XOR(*px, PAD(mlen)); + printstate("process plaintext", s); +} + +__forceinline void decrypt(state_t* s, uint8_t* m, const uint8_t* c, + uint64_t clen) { + word_t* restrict px; + word_t cx; + /* process ciphertext */ + while (clen >= ASCON_RATE) { + cx = LOAD64(c); + XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + if (ASCON_RATE == 16) { + cx = LOAD64(c + 8); + XOR(s->x1, cx); + STORE64(m + 8, s->x1); + s->x1 = cx; + } + PB(s); + m += ASCON_RATE; + c += ASCON_RATE; + clen -= ASCON_RATE; + } + /* final ciphertext block */ + px = &s->x0; + if (ASCON_RATE == 16 && clen >= 8) { + cx = LOAD64(c); + XOR(s->x0, cx); + STORE64(m, s->x0); + s->x0 = cx; + px = &s->x1; + m += 8; + c += 8; + clen -= 8; + } + if (clen) { + cx = LOAD(c, clen); + XOR(*px, cx); + STORE(m, *px, clen); + AND(*px, XMASK(clen)); + XOR(*px, cx); + } + XOR(*px, PAD(clen)); + printstate("process ciphertext", s); +} + +__forceinline void final(state_t* s, const uint8_t* k) { + word_t K0, K1, K2; + /* load key */ + loadkey(&K0, &K1, &K2, k); + /* finalization */ + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) { + XOR(s->x1, K1); + XOR(s->x2, K2); + } + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { + XOR(s->x2, K1); + XOR(s->x3, K2); + } + if (CRYPTO_KEYBYTES == 20) { + XOR(s->x1, KEYROT(K0, K1)); + XOR(s->x2, KEYROT(K1, K2)); + XOR(s->x3, KEYROT(K2, WORD_T(0))); + } + P12(s); + XOR(s->x3, K1); + XOR(s->x4, K2); + printstate("finalization", s); +} + +#if !ASCON_INLINE_MODE + +#define INIT ascon_init +#define ABSORB ascon_absorb +#define ENCRYPT ascon_encrypt +#define DECRYPT ascon_decrypt +#define FINAL ascon_final + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k) { + init(s, npub, k); +} + +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen) { + absorb(s, ad, adlen); +} + +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen) { + encrypt(s, c, m, mlen); +} + +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen) { + decrypt(s, m, c, clen); +} + +void ascon_final(state_t* s, const uint8_t* k) { final(s, k); } + +#else + +#define INIT init +#define ABSORB absorb +#define ENCRYPT encrypt +#define DECRYPT decrypt +#define FINAL final + +#endif + +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + state_t s; + (void)nsec; + *clen = mlen + CRYPTO_ABYTES; + /* perform ascon computation */ + INIT(&s, npub, k); + ABSORB(&s, ad, adlen); + ENCRYPT(&s, c, m, mlen); + FINAL(&s, k); + /* set tag */ + c += mlen; + STORE64(c, s.x3); + STORE64(c + 8, s.x4); + return 0; +} + +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { + state_t s; + (void)nsec; + if (clen < CRYPTO_ABYTES) { + *mlen = 0; + return -1; + } + *mlen = clen = clen - CRYPTO_ABYTES; + /* perform ascon computation */ + INIT(&s, npub, k); + ABSORB(&s, ad, adlen); + DECRYPT(&s, m, c, clen); + FINAL(&s, k); + /* verify tag (should be constant time, check compiler output) */ + c += clen; + XOR(s.x3, LOAD64(c)); + XOR(s.x4, LOAD64(c + 8)); + if (NOTZERO(s.x3, s.x4)) { + *mlen = 0; + return -1; + } + return 0; +} diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/ascon.h b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/ascon.h new file mode 100644 index 0000000..10a5b6e --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/ascon.h @@ -0,0 +1,19 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#include "config.h" +#include "word.h" + +typedef struct { + word_t x0, x1, x2, x3, x4; +} state_t; + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k); +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen); +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); +void ascon_final(state_t* s, const uint8_t* k); + +#endif // ASCON_H_ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/config.h b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/config.h new file mode 100644 index 0000000..8d8a1a0 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/config.h @@ -0,0 +1,34 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* inline the Ascon mode */ +#ifndef ASCON_INLINE_MODE +#define ASCON_INLINE_MODE 1 +#endif + +/* inline the Ascon permutations */ +#ifndef ASCON_INLINE_PERM +#define ASCON_INLINE_PERM 1 +#endif + +/* single function for all permutations */ +#ifndef ASCON_SINGLE_PERM +#define ASCON_SINGLE_PERM 0 +#endif + +/* unroll the permutation loops */ +#ifndef ASCON_UNROLL_LOOPS +#define ASCON_UNROLL_LOOPS 1 +#endif + +/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ +#ifndef ASCON_DATA_ACCESS +#define ASCON_DATA_ACCESS 'H' +#endif + +/* make sure __forceinline is supported */ +#ifndef __forceinline +#define __forceinline inline __attribute__((always_inline)) +#endif + +#endif /* CONFIG_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/endian.h b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/endian.h index ffbdcd9..3944360 100644 --- a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/endian.h +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/endian.h @@ -3,29 +3,37 @@ #if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -// macros for big endian machines +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif #define U64BIG(x) (x) #define U32BIG(x) (x) #define U16BIG(x) (x) #elif defined(_MSC_VER) || \ - (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -// macros for little endian machines -#define U64BIG(x) \ - ((((x) & 0x00000000000000FFULL) << 56) | (((x) & 0x000000000000FF00ULL) << 40) | \ - (((x) & 0x0000000000FF0000ULL) << 24) | (((x) & 0x00000000FF000000ULL) << 8) | \ - (((x) & 0x000000FF00000000ULL) >> 8) | (((x) & 0x0000FF0000000000ULL) >> 24) | \ - (((x) & 0x00FF000000000000ULL) >> 40) | (((x) & 0xFF00000000000000ULL) >> 56)) +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) #define U32BIG(x) \ - ((((x) & 0x000000FF) << 24) | (((x) & 0x0000FF00) << 8) | \ - (((x) & 0x00FF0000) >> 8) | (((x) & 0xFF000000) >> 24)) -#define U16BIG(x) \ - ((((x) & 0x00FF) << 8) | (((x) & 0xFF00) >> 8)) + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) #else -#error "ascon byte order macros not defined in endian.h" +#error "Ascon byte order macros not defined in endian.h" #endif -#endif // ENDIAN_H_ - +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/loadstore.h b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/loadstore.h new file mode 100644 index 0000000..6cb0cca --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/loadstore.h @@ -0,0 +1,118 @@ +#ifndef LOADSTORE_H_ +#define LOADSTORE_H_ + +#include + +#include "config.h" +#include "endian.h" +#include "word.h" + +/* 64-bit LSB mask (undefined for n == 0) */ +#define MASK(n) (~0ull >> (64 - (n))) + +/* get byte from Ascon 64-bit word */ +#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i)))) + +/* set byte in Ascon 64-bit word */ +#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i))) + +#if ASCON_DATA_ACCESS == 'W' + +#ifndef NDEBUG +#pragma message("Using wordwise data access") +#endif + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = U64BIG(*(uint64_t*)bytes); + return U64TOWORD(x); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = U64BIG(*(uint64_t*)bytes & MASK(8 * n)); + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(8 * n); + *(uint64_t*)bytes |= U64BIG(x); +} + +#elif ASCON_DATA_ACCESS == 'M' + +#ifndef NDEBUG +#pragma message("Using memcpy to access data") +#endif + +#include + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + memcpy((uint8_t*)&x, bytes, n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = U64BIG(WORDTOU64(w)); + memcpy(bytes, (uint8_t*)&x, n); +} + +#elif ASCON_DATA_ACCESS == 'B' + +#ifndef NDEBUG +#pragma message("Using bytewise data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#elif ASCON_DATA_ACCESS == 'H' + +#ifndef NDEBUG +#pragma message("Using hybrid data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + if (n == 8) + x = U64BIG(*(uint64_t*)bytes); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + if (n == 8) + *(uint64_t*)bytes = U64BIG(x); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#else +#error "Ascon data access macro not defined correctly" +#endif + +#endif /* LOADSTORE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/permutations.c b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/permutations.c new file mode 100644 index 0000000..3aecaa6 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/permutations.c @@ -0,0 +1,26 @@ +#include "permutations.h" + +#include "round.h" + +#if ASCON_INLINE_PERM + +#elif ASCON_SINGLE_PERM + +void P(state_t* s, uint8_t rounds) { + printstate(" permutation input", s); + for (int i = START(rounds); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s) { P12ROUNDS(s); } + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16 +void P8(state_t* s) { P8ROUNDS(s); } +#endif + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 +void P6(state_t* s) { P6ROUNDS(s); } +#endif + +#endif diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/permutations.h b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/permutations.h index 4af1e2c..6172dd5 100644 --- a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/permutations.h +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/permutations.h @@ -1,102 +1,184 @@ #ifndef PERMUTATIONS_H_ #define PERMUTATIONS_H_ -typedef unsigned char u8; -typedef unsigned long long u64; - -typedef struct { - u64 x0, x1, x2, x3, x4; -} state; - -#define EXT_BYTE64(x, n) ((u8)((u64)(x) >> (8 * (7 - (n))))) -#define INS_BYTE64(x, n) ((u64)(x) << (8 * (7 - (n)))) -#define ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) - -#define ROUND(C) \ - do { \ - state t; \ - s.x2 ^= C; \ - s.x0 ^= s.x4; \ - s.x4 ^= s.x3; \ - s.x2 ^= s.x1; \ - t.x0 = s.x0; \ - t.x4 = s.x4; \ - t.x3 = s.x3; \ - t.x1 = s.x1; \ - t.x2 = s.x2; \ - s.x0 = t.x0 ^ ((~t.x1) & t.x2); \ - s.x2 = t.x2 ^ ((~t.x3) & t.x4); \ - s.x4 = t.x4 ^ ((~t.x0) & t.x1); \ - s.x1 = t.x1 ^ ((~t.x2) & t.x3); \ - s.x3 = t.x3 ^ ((~t.x4) & t.x0); \ - s.x1 ^= s.x0; \ - t.x1 = s.x1; \ - s.x1 = ROTR64(s.x1, 39); \ - s.x3 ^= s.x2; \ - t.x2 = s.x2; \ - s.x2 = ROTR64(s.x2, 1); \ - t.x4 = s.x4; \ - t.x2 ^= s.x2; \ - s.x2 = ROTR64(s.x2, 6 - 1); \ - t.x3 = s.x3; \ - t.x1 ^= s.x1; \ - s.x3 = ROTR64(s.x3, 10); \ - s.x0 ^= s.x4; \ - s.x4 = ROTR64(s.x4, 7); \ - t.x3 ^= s.x3; \ - s.x2 ^= t.x2; \ - s.x1 = ROTR64(s.x1, 61 - 39); \ - t.x0 = s.x0; \ - s.x2 = ~s.x2; \ - s.x3 = ROTR64(s.x3, 17 - 10); \ - t.x4 ^= s.x4; \ - s.x4 = ROTR64(s.x4, 41 - 7); \ - s.x3 ^= t.x3; \ - s.x1 ^= t.x1; \ - s.x0 = ROTR64(s.x0, 19); \ - s.x4 ^= t.x4; \ - t.x0 ^= s.x0; \ - s.x0 = ROTR64(s.x0, 28 - 19); \ - s.x0 ^= t.x0; \ - } while (0) - -#define P12() \ - do { \ - ROUND(0xf0); \ - ROUND(0xe1); \ - ROUND(0xd2); \ - ROUND(0xc3); \ - ROUND(0xb4); \ - ROUND(0xa5); \ - ROUND(0x96); \ - ROUND(0x87); \ - ROUND(0x78); \ - ROUND(0x69); \ - ROUND(0x5a); \ - ROUND(0x4b); \ - } while (0) - -#define P8() \ - do { \ - ROUND(0xb4); \ - ROUND(0xa5); \ - ROUND(0x96); \ - ROUND(0x87); \ - ROUND(0x78); \ - ROUND(0x69); \ - ROUND(0x5a); \ - ROUND(0x4b); \ - } while (0) - -#define P6() \ - do { \ - ROUND(0x96); \ - ROUND(0x87); \ - ROUND(0x78); \ - ROUND(0x69); \ - ROUND(0x5a); \ - ROUND(0x4b); \ - } while (0) - -#endif // PERMUTATIONS_H_ +#include +#include "api.h" +#include "ascon.h" +#include "printstate.h" +#include "round.h" + +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 + +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 + +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 + +#define ASCON_HASH_BYTES 32 + +#define ASCON_128_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_128A_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) + +#define ASCON_80PQ_IV \ + U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_HASH_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) + +#define ASCON_XOF_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) + +#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) +#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) +#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) +#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) +#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) + +#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) +#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) +#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) +#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) +#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 +#define IV ASCON_128_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#if ASCON_RATE == 16 +#define IV ASCON_128A_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define PB P8 +#endif + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 +#define IV ASCON_80PQ_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#define START(n) ((3 + (n)) << 4 | (12 - (n))) + +#if ASCON_UNROLL_LOOPS + +__forceinline void P12ROUNDS(state_t* s) { + ROUND(s, 0xf0); + ROUND(s, 0xe1); + ROUND(s, 0xd2); + ROUND(s, 0xc3); + ROUND(s, 0xb4); + ROUND(s, 0xa5); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +__forceinline void P8ROUNDS(state_t* s) { + ROUND(s, 0xb4); + ROUND(s, 0xa5); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +__forceinline void P6ROUNDS(state_t* s) { + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +#else /* !ASCON_UNROLL_LOOPS */ + +__forceinline void P12ROUNDS(state_t* s) { + for (int i = START(12); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +__forceinline void P8ROUNDS(state_t* s) { + for (int i = START(8); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +__forceinline void P6ROUNDS(state_t* s) { + for (int i = START(6); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +#endif + +#if ASCON_INLINE_PERM + +__forceinline void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +__forceinline void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} + +__forceinline void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#elif ASCON_SINGLE_PERM + +#define P12(s) P(s, 12) +#define P8(s) P(s, 8) +#define P6(s) P(s, 6) + +void P(state_t* s, uint8_t rounds); + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s); +void P8(state_t* s); +void P6(state_t* s); + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#endif + +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/printstate.h b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/printstate.h new file mode 100644 index 0000000..34bd476 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/printstate.h @@ -0,0 +1,31 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/round.h b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/round.h new file mode 100644 index 0000000..077cbfd --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/round.h @@ -0,0 +1,69 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" + +__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { + *K0 = WORD_T(0); + *K1 = WORD_T(0); + *K2 = WORD_T(0); +} + +__forceinline void PINIT(state_t* s) { + s->x0 = WORD_T(0); + s->x1 = WORD_T(0); + s->x2 = WORD_T(0); + s->x3 = WORD_T(0); + s->x4 = WORD_T(0); +} + +__forceinline void ROUND(state_t* s, uint64_t C) { + state_t t; + s->x2 ^= C; + s->x0 ^= s->x4; + s->x4 ^= s->x3; + s->x2 ^= s->x1; + t.x0 = s->x0; + t.x4 = s->x4; + t.x3 = s->x3; + t.x1 = s->x1; + t.x2 = s->x2; + s->x0 = t.x0 ^ (~t.x1 & t.x2); + s->x2 = t.x2 ^ (~t.x3 & t.x4); + s->x4 = t.x4 ^ (~t.x0 & t.x1); + s->x1 = t.x1 ^ (~t.x2 & t.x3); + s->x3 = t.x3 ^ (~t.x4 & t.x0); + s->x1 ^= s->x0; + t.x1 = s->x1; + s->x1 = ROR64(s->x1, 39); + s->x3 ^= s->x2; + t.x2 = s->x2; + s->x2 = ROR64(s->x2, 1); + t.x4 = s->x4; + t.x2 ^= s->x2; + s->x2 = ROR64(s->x2, 6 - 1); + t.x3 = s->x3; + t.x1 ^= s->x1; + s->x3 = ROR64(s->x3, 10); + s->x0 ^= s->x4; + s->x4 = ROR64(s->x4, 7); + t.x3 ^= s->x3; + s->x2 ^= t.x2; + s->x1 = ROR64(s->x1, 61 - 39); + t.x0 = s->x0; + s->x2 = ~s->x2; + s->x3 = ROR64(s->x3, 17 - 10); + t.x4 ^= s->x4; + s->x4 = ROR64(s->x4, 41 - 7); + s->x3 ^= t.x3; + s->x1 ^= t.x1; + s->x0 = ROR64(s->x0, 19); + s->x4 ^= t.x4; + t.x0 ^= s->x0; + s->x0 = ROR64(s->x0, 28 - 19); + s->x0 ^= t.x0; + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/word.h b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/word.h new file mode 100644 index 0000000..5d601bb --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64/word.h @@ -0,0 +1,47 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#include "config.h" + +typedef uint64_t word_t; + +#define WORD_T +#define UINT64_T + +#define U64TOWORD +#define WORDTOU64 + +#define XOR(a, b) \ + do { \ + (a) ^= (b); \ + } while (0) + +#define AND(a, b) \ + do { \ + (a) &= (b); \ + } while (0) + +__forceinline word_t ROR64(word_t x, int n) { return x >> n | x << (64 - n); } + +__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { + return lo2hi << 32 | hi2lo >> 32; +} + +__forceinline int NOTZERO(word_t a, word_t b) { + int result = 0; + for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i]; + for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i]; + return result; +} + +/* set padding byte in 64-bit Ascon word */ +__forceinline word_t PAD(int i) { return WORD_T(0x80ull << (56 - 8 * i)); } + +/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ +__forceinline word_t XMASK(int n) { + return WORD_T(0x00ffffffffffffffull >> (n * 8 - 8)); +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/api.h b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/api.h index 4b53d6c..5fa0140 100644 --- a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/api.h +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/api.h @@ -3,3 +3,4 @@ #define CRYPTO_NPUBBYTES 16 #define CRYPTO_ABYTES 16 #define CRYPTO_NOOVERLAP 1 +#define ASCON_RATE 8 diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/ascon.c b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/ascon.c new file mode 100644 index 0000000..5e60f1a --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/ascon.c @@ -0,0 +1,63 @@ +#include "ascon.h" + +#include "api.h" +#include "loadstore.h" +#include "permutations.h" +#include "printstate.h" + +void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, + uint8_t mode); + +void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen, + const uint8_t* ad, uint64_t adlen, const uint8_t* npub, + const uint8_t* k, uint8_t mode) { + word_t N0, N1, K0, K1, K2; + /* load nonce */ + N0 = LOAD64(npub); + N1 = LOAD64(npub + 8); + /* load key */ + if (CRYPTO_KEYBYTES == 20) { + K0 = KEYROT(WORD_T(0), LOAD(k, 4)); + k += 4; + } + K1 = LOAD64(k); + K2 = LOAD64(k + 8); + /* initialization */ + s->x0 = IV; + if (CRYPTO_KEYBYTES == 20) XOR(s->x0, K0); + s->x1 = K1; + s->x2 = K2; + s->x3 = N0; + s->x4 = N1; + P12(s); + if (CRYPTO_KEYBYTES == 20) XOR(s->x2, K0); + XOR(s->x3, K1); + XOR(s->x4, K2); + printstate("initialization", s); + /* process associated data */ + if (adlen) { + process_data(s, (void*)0, ad, adlen, ASCON_AD); + PB(s); + } + XOR(s->x4, WORD_T(1)); + /* process plaintext/ciphertext */ + process_data(s, out, in, tlen, mode); + /* finalization */ + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 8) { + XOR(s->x1, K1); + XOR(s->x2, K2); + } + if (CRYPTO_KEYBYTES == 16 && ASCON_RATE == 16) { + XOR(s->x2, K1); + XOR(s->x3, K2); + } + if (CRYPTO_KEYBYTES == 20) { + XOR(s->x1, KEYROT(K0, K1)); + XOR(s->x2, KEYROT(K1, K2)); + XOR(s->x3, KEYROT(K2, WORD_T(0))); + } + P12(s); + XOR(s->x3, K1); + XOR(s->x4, K2); + printstate("finalization", s); +} diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/ascon.h b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/ascon.h new file mode 100644 index 0000000..aa685d3 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/ascon.h @@ -0,0 +1,24 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#include "config.h" +#include "word.h" + +typedef struct { + word_t x0, x1, x2, x3, x4; +} state_t; + +#define ASCON_AD 0 +#define ASCON_ENC 1 +#define ASCON_DEC 2 + +void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, + uint8_t mode); + +void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen, + const uint8_t* ad, uint64_t adlen, const uint8_t* npub, + const uint8_t* k, uint8_t mode); + +#endif // ASCON_H_ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/config.h b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/config.h new file mode 100644 index 0000000..ec8bd6f --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/config.h @@ -0,0 +1,34 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* inline the Ascon mode */ +#ifndef ASCON_INLINE_MODE +#define ASCON_INLINE_MODE 1 +#endif + +/* inline the Ascon permutations */ +#ifndef ASCON_INLINE_PERM +#define ASCON_INLINE_PERM 0 +#endif + +/* single function for all permutations */ +#ifndef ASCON_SINGLE_PERM +#define ASCON_SINGLE_PERM 1 +#endif + +/* unroll the permutation loops */ +#ifndef ASCON_UNROLL_LOOPS +#define ASCON_UNROLL_LOOPS 1 +#endif + +/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ +#ifndef ASCON_DATA_ACCESS +#define ASCON_DATA_ACCESS 'H' +#endif + +/* make sure __forceinline is supported */ +#ifndef __forceinline +#define __forceinline inline __attribute__((always_inline)) +#endif + +#endif /* CONFIG_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/decrypt.c b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/decrypt.c index 0cde81e..605bd03 100644 --- a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/decrypt.c +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/decrypt.c @@ -1,29 +1,32 @@ -#include "core.h" +#include "api.h" +#include "ascon.h" +#include "loadstore.h" +#include "permutations.h" +#include "printstate.h" -int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen, - unsigned char* nsec, const unsigned char* c, - unsigned long long clen, const unsigned char* ad, - unsigned long long adlen, const unsigned char* npub, - const unsigned char* k) { +void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen, + const uint8_t* ad, uint64_t adlen, const uint8_t* npub, + const uint8_t* k, uint8_t mode); + +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { if (clen < CRYPTO_ABYTES) { *mlen = 0; return -1; } - - state s; + state_t s; (void)nsec; - - // set plaintext size + /* set plaintext size */ *mlen = clen - CRYPTO_ABYTES; - + /* ascon decryption */ ascon_core(&s, m, c, *mlen, ad, adlen, npub, k, ASCON_DEC); - - // verify tag (should be constant time, check compiler output) - if (((s.x3 ^ U64BIG(*(u64*)(c + *mlen))) | - (s.x4 ^ U64BIG(*(u64*)(c + *mlen + 8)))) != 0) { + /* verify tag (should be constant time, check compiler output) */ + XOR(s.x3, LOAD64(c + *mlen)); + XOR(s.x4, LOAD64(c + *mlen + 8)); + if (NOTZERO(s.x3, s.x4)) { *mlen = 0; return -1; } - return 0; } diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/encrypt.c b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/encrypt.c index 5961c60..641b266 100644 --- a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/encrypt.c +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/encrypt.c @@ -1,21 +1,25 @@ -#include "core.h" +#include "api.h" +#include "ascon.h" +#include "loadstore.h" +#include "permutations.h" +#include "printstate.h" -int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen, - const unsigned char* m, unsigned long long mlen, - const unsigned char* ad, unsigned long long adlen, - const unsigned char* nsec, const unsigned char* npub, - const unsigned char* k) { - state s; - (void)nsec; +void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen, + const uint8_t* ad, uint64_t adlen, const uint8_t* npub, + const uint8_t* k, uint8_t mode); - // set ciphertext size +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + state_t s; + (void)nsec; + /* set ciphertext size */ *clen = mlen + CRYPTO_ABYTES; - + /* ascon encryption */ ascon_core(&s, c, m, mlen, ad, adlen, npub, k, ASCON_ENC); - - // set tag - *(u64*)(c + mlen) = U64BIG(s.x3); - *(u64*)(c + mlen + 8) = U64BIG(s.x4); - + /* set tag */ + STORE64(c + mlen, s.x3); + STORE64(c + mlen + 8, s.x4); return 0; } diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/endian.h b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/endian.h index b4d18f5..3944360 100644 --- a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/endian.h +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/endian.h @@ -3,7 +3,10 @@ #if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -// macros for big endian machines +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif #define U64BIG(x) (x) #define U32BIG(x) (x) #define U16BIG(x) (x) @@ -11,19 +14,26 @@ #elif defined(_MSC_VER) || \ (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -// macros for little endian machines -#define U64BIG(x) \ - ((((x)&0x00000000000000FFULL) << 56) | (((x)&0x000000000000FF00ULL) << 40) | \ - (((x)&0x0000000000FF0000ULL) << 24) | (((x)&0x00000000FF000000ULL) << 8) | \ - (((x)&0x000000FF00000000ULL) >> 8) | (((x)&0x0000FF0000000000ULL) >> 24) | \ - (((x)&0x00FF000000000000ULL) >> 40) | (((x)&0xFF00000000000000ULL) >> 56)) -#define U32BIG(x) \ - ((((x)&0x000000FF) << 24) | (((x)&0x0000FF00) << 8) | \ - (((x)&0x00FF0000) >> 8) | (((x)&0xFF000000) >> 24)) -#define U16BIG(x) ((((x)&0x00FF) << 8) | (((x)&0xFF00) >> 8)) +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) +#define U32BIG(x) \ + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) #else -#error "ascon byte order macros not defined in endian.h" +#error "Ascon byte order macros not defined in endian.h" #endif -#endif // ENDIAN_H_ +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/loadstore.h b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/loadstore.h new file mode 100644 index 0000000..6cb0cca --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/loadstore.h @@ -0,0 +1,118 @@ +#ifndef LOADSTORE_H_ +#define LOADSTORE_H_ + +#include + +#include "config.h" +#include "endian.h" +#include "word.h" + +/* 64-bit LSB mask (undefined for n == 0) */ +#define MASK(n) (~0ull >> (64 - (n))) + +/* get byte from Ascon 64-bit word */ +#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i)))) + +/* set byte in Ascon 64-bit word */ +#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i))) + +#if ASCON_DATA_ACCESS == 'W' + +#ifndef NDEBUG +#pragma message("Using wordwise data access") +#endif + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = U64BIG(*(uint64_t*)bytes); + return U64TOWORD(x); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = U64BIG(*(uint64_t*)bytes & MASK(8 * n)); + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(8 * n); + *(uint64_t*)bytes |= U64BIG(x); +} + +#elif ASCON_DATA_ACCESS == 'M' + +#ifndef NDEBUG +#pragma message("Using memcpy to access data") +#endif + +#include + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + memcpy((uint8_t*)&x, bytes, n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = U64BIG(WORDTOU64(w)); + memcpy(bytes, (uint8_t*)&x, n); +} + +#elif ASCON_DATA_ACCESS == 'B' + +#ifndef NDEBUG +#pragma message("Using bytewise data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#elif ASCON_DATA_ACCESS == 'H' + +#ifndef NDEBUG +#pragma message("Using hybrid data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + if (n == 8) + x = U64BIG(*(uint64_t*)bytes); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + if (n == 8) + *(uint64_t*)bytes = U64BIG(x); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#else +#error "Ascon data access macro not defined correctly" +#endif + +#endif /* LOADSTORE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/permutations.c b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/permutations.c index 9aaf9d1..3aecaa6 100644 --- a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/permutations.c +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/permutations.c @@ -1,8 +1,26 @@ #include "permutations.h" -void P(state *p, u8 rounds) { - state s = *p; - u8 i, start = START_CONSTANT(rounds); - for (i = start; i > 0x4a; i -= 0x0f) ROUND(i); - *p = s; +#include "round.h" + +#if ASCON_INLINE_PERM + +#elif ASCON_SINGLE_PERM + +void P(state_t* s, uint8_t rounds) { + printstate(" permutation input", s); + for (int i = START(rounds); i > 0x4a; i -= 0x0f) ROUND(s, i); } + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s) { P12ROUNDS(s); } + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16 +void P8(state_t* s) { P8ROUNDS(s); } +#endif + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 +void P6(state_t* s) { P6ROUNDS(s); } +#endif + +#endif diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/permutations.h b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/permutations.h index 7143e82..6172dd5 100644 --- a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/permutations.h +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/permutations.h @@ -1,66 +1,184 @@ #ifndef PERMUTATIONS_H_ #define PERMUTATIONS_H_ -typedef unsigned char u8; -typedef unsigned long long u64; - -typedef struct { - u64 x0, x1, x2, x3, x4; -} state; - -#define EXT_BYTE64(x, n) ((u8)((u64)(x) >> (8 * (7 - (n))))) -#define INS_BYTE64(x, n) ((u64)(x) << (8 * (7 - (n)))) -#define ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) -#define START_CONSTANT(x) (((0xf - (12 - (x))) << 4) | (12 - (x))) - -#define ROUND(C) \ - do { \ - state t; \ - s.x2 ^= C; \ - s.x0 ^= s.x4; \ - s.x4 ^= s.x3; \ - s.x2 ^= s.x1; \ - t.x0 = s.x0; \ - t.x4 = s.x4; \ - t.x3 = s.x3; \ - t.x1 = s.x1; \ - t.x2 = s.x2; \ - s.x0 = t.x0 ^ ((~t.x1) & t.x2); \ - s.x2 = t.x2 ^ ((~t.x3) & t.x4); \ - s.x4 = t.x4 ^ ((~t.x0) & t.x1); \ - s.x1 = t.x1 ^ ((~t.x2) & t.x3); \ - s.x3 = t.x3 ^ ((~t.x4) & t.x0); \ - s.x1 ^= s.x0; \ - t.x1 = s.x1; \ - s.x1 = ROTR64(s.x1, 39); \ - s.x3 ^= s.x2; \ - t.x2 = s.x2; \ - s.x2 = ROTR64(s.x2, 1); \ - t.x4 = s.x4; \ - t.x2 ^= s.x2; \ - s.x2 = ROTR64(s.x2, 6 - 1); \ - t.x3 = s.x3; \ - t.x1 ^= s.x1; \ - s.x3 = ROTR64(s.x3, 10); \ - s.x0 ^= s.x4; \ - s.x4 = ROTR64(s.x4, 7); \ - t.x3 ^= s.x3; \ - s.x2 ^= t.x2; \ - s.x1 = ROTR64(s.x1, 61 - 39); \ - t.x0 = s.x0; \ - s.x2 = ~s.x2; \ - s.x3 = ROTR64(s.x3, 17 - 10); \ - t.x4 ^= s.x4; \ - s.x4 = ROTR64(s.x4, 41 - 7); \ - s.x3 ^= t.x3; \ - s.x1 ^= t.x1; \ - s.x0 = ROTR64(s.x0, 19); \ - s.x4 ^= t.x4; \ - t.x0 ^= s.x0; \ - s.x0 = ROTR64(s.x0, 28 - 19); \ - s.x0 ^= t.x0; \ - } while (0) - -void P(state *p, u8 rounds); - -#endif // PERMUTATIONS_H_ +#include + +#include "api.h" +#include "ascon.h" +#include "printstate.h" +#include "round.h" + +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 + +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 + +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 + +#define ASCON_HASH_BYTES 32 + +#define ASCON_128_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_128A_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) + +#define ASCON_80PQ_IV \ + U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_HASH_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) + +#define ASCON_XOF_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) + +#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) +#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) +#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) +#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) +#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) + +#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) +#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) +#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) +#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) +#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 +#define IV ASCON_128_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#if ASCON_RATE == 16 +#define IV ASCON_128A_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define PB P8 +#endif + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 +#define IV ASCON_80PQ_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#define START(n) ((3 + (n)) << 4 | (12 - (n))) + +#if ASCON_UNROLL_LOOPS + +__forceinline void P12ROUNDS(state_t* s) { + ROUND(s, 0xf0); + ROUND(s, 0xe1); + ROUND(s, 0xd2); + ROUND(s, 0xc3); + ROUND(s, 0xb4); + ROUND(s, 0xa5); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +__forceinline void P8ROUNDS(state_t* s) { + ROUND(s, 0xb4); + ROUND(s, 0xa5); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +__forceinline void P6ROUNDS(state_t* s) { + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +#else /* !ASCON_UNROLL_LOOPS */ + +__forceinline void P12ROUNDS(state_t* s) { + for (int i = START(12); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +__forceinline void P8ROUNDS(state_t* s) { + for (int i = START(8); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +__forceinline void P6ROUNDS(state_t* s) { + for (int i = START(6); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +#endif + +#if ASCON_INLINE_PERM + +__forceinline void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +__forceinline void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} + +__forceinline void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#elif ASCON_SINGLE_PERM + +#define P12(s) P(s, 12) +#define P8(s) P(s, 8) +#define P6(s) P(s, 6) + +void P(state_t* s, uint8_t rounds); + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s); +void P8(state_t* s); +void P6(state_t* s); + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#endif + +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/printstate.h b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/printstate.h new file mode 100644 index 0000000..34bd476 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/printstate.h @@ -0,0 +1,31 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/process.c b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/process.c new file mode 100644 index 0000000..81fdd15 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/process.c @@ -0,0 +1,31 @@ +#include "api.h" +#include "ascon.h" +#include "loadstore.h" +#include "permutations.h" +#include "printstate.h" + +void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, + uint8_t mode) { + word_t tmp0; + int n = 0; + while (len) { + /* determine block size */ + n = len < ASCON_RATE ? len : ASCON_RATE; + /* absorb data */ + tmp0 = LOAD(in, n); + XOR(s->x0, tmp0); + /* extract data */ + if (mode != ASCON_AD) STORE(out, s->x0, n); + /* insert data */ + if (mode == ASCON_DEC) { + AND(s->x0, XMASK(n)); + XOR(s->x0, tmp0); + } + /* compute permutation for full blocks */ + if (n == ASCON_RATE) PB(s); + in += n; + out += n; + len -= n; + } + XOR(s->x0, PAD(n % 8)); +} diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/round.h b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/round.h new file mode 100644 index 0000000..077cbfd --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/round.h @@ -0,0 +1,69 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" + +__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { + *K0 = WORD_T(0); + *K1 = WORD_T(0); + *K2 = WORD_T(0); +} + +__forceinline void PINIT(state_t* s) { + s->x0 = WORD_T(0); + s->x1 = WORD_T(0); + s->x2 = WORD_T(0); + s->x3 = WORD_T(0); + s->x4 = WORD_T(0); +} + +__forceinline void ROUND(state_t* s, uint64_t C) { + state_t t; + s->x2 ^= C; + s->x0 ^= s->x4; + s->x4 ^= s->x3; + s->x2 ^= s->x1; + t.x0 = s->x0; + t.x4 = s->x4; + t.x3 = s->x3; + t.x1 = s->x1; + t.x2 = s->x2; + s->x0 = t.x0 ^ (~t.x1 & t.x2); + s->x2 = t.x2 ^ (~t.x3 & t.x4); + s->x4 = t.x4 ^ (~t.x0 & t.x1); + s->x1 = t.x1 ^ (~t.x2 & t.x3); + s->x3 = t.x3 ^ (~t.x4 & t.x0); + s->x1 ^= s->x0; + t.x1 = s->x1; + s->x1 = ROR64(s->x1, 39); + s->x3 ^= s->x2; + t.x2 = s->x2; + s->x2 = ROR64(s->x2, 1); + t.x4 = s->x4; + t.x2 ^= s->x2; + s->x2 = ROR64(s->x2, 6 - 1); + t.x3 = s->x3; + t.x1 ^= s->x1; + s->x3 = ROR64(s->x3, 10); + s->x0 ^= s->x4; + s->x4 = ROR64(s->x4, 7); + t.x3 ^= s->x3; + s->x2 ^= t.x2; + s->x1 = ROR64(s->x1, 61 - 39); + t.x0 = s->x0; + s->x2 = ~s->x2; + s->x3 = ROR64(s->x3, 17 - 10); + t.x4 ^= s->x4; + s->x4 = ROR64(s->x4, 41 - 7); + s->x3 ^= t.x3; + s->x1 ^= t.x1; + s->x0 = ROR64(s->x0, 19); + s->x4 ^= t.x4; + t.x0 ^= s->x0; + s->x0 = ROR64(s->x0, 28 - 19); + s->x0 ^= t.x0; + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/word.h b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/word.h new file mode 100644 index 0000000..5d601bb --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/opt64_lowsize/word.h @@ -0,0 +1,47 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#include "config.h" + +typedef uint64_t word_t; + +#define WORD_T +#define UINT64_T + +#define U64TOWORD +#define WORDTOU64 + +#define XOR(a, b) \ + do { \ + (a) ^= (b); \ + } while (0) + +#define AND(a, b) \ + do { \ + (a) &= (b); \ + } while (0) + +__forceinline word_t ROR64(word_t x, int n) { return x >> n | x << (64 - n); } + +__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { + return lo2hi << 32 | hi2lo >> 32; +} + +__forceinline int NOTZERO(word_t a, word_t b) { + int result = 0; + for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i]; + for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i]; + return result; +} + +/* set padding byte in 64-bit Ascon word */ +__forceinline word_t PAD(int i) { return WORD_T(0x80ull << (56 - 8 * i)); } + +/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ +__forceinline word_t XMASK(int n) { + return WORD_T(0x00ffffffffffffffull >> (n * 8 - 8)); +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/ref/ascon.h b/ascon/Implementations/crypto_aead/ascon80pqv12/ref/ascon.h new file mode 100644 index 0000000..8ab0502 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/ref/ascon.h @@ -0,0 +1,14 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#define WORDTOU64 + +typedef uint64_t word_t; + +typedef struct { + word_t x0, x1, x2, x3, x4; +} state_t; + +#endif // ASCON_H_ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/ref/decrypt.c b/ascon/Implementations/crypto_aead/ascon80pqv12/ref/decrypt.c index dcdebf7..e0881ac 100644 --- a/ascon/Implementations/crypto_aead/ascon80pqv12/ref/decrypt.c +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/ref/decrypt.c @@ -1,97 +1,93 @@ #include "api.h" +#include "ascon.h" +#include "loadstore.h" #include "permutations.h" +#include "printstate.h" -#define RATE (64 / 8) -#define PA_ROUNDS 12 -#define PB_ROUNDS 6 -#define IV \ - ((u64)(8 * (CRYPTO_KEYBYTES)) << 56 | (u64)(8 * (RATE)) << 48 | \ - (u64)(PA_ROUNDS) << 40 | (u64)(PB_ROUNDS) << 32) +int crypto_aead_decrypt(uint8_t* m, uint64_t* mlen, uint8_t* nsec, + const uint8_t* c, uint64_t clen, const uint8_t* ad, + uint64_t adlen, const uint8_t* npub, const uint8_t* k) { + uint64_t K0, K1, K2, N0, N1; + state_t s; + (void)nsec; -int crypto_aead_decrypt(unsigned char* m, unsigned long long* mlen, - unsigned char* nsec, const unsigned char* c, - unsigned long long clen, const unsigned char* ad, - unsigned long long adlen, const unsigned char* npub, - const unsigned char* k) { if (clen < CRYPTO_ABYTES) { *mlen = 0; return -1; } - const u64 K0 = BYTES_TO_U64(k, 8) >> 32; - const u64 K1 = BYTES_TO_U64(k + 4, 8); - const u64 K2 = BYTES_TO_U64(k + 12, 8); - const u64 N0 = BYTES_TO_U64(npub, 8); - const u64 N1 = BYTES_TO_U64(npub + 8, 8); - state s; - u64 c0; - (void)nsec; - - // set plaintext size + /* set plaintext size */ *mlen = clen - CRYPTO_ABYTES; - // initialization - s.x0 = IV | K0; + /* load key and nonce */ + K0 = LOAD(k + 0, 4) >> 32; + K1 = LOAD(k + 4, 8); + K2 = LOAD(k + 12, 8); + N0 = LOAD(npub, 8); + N1 = LOAD(npub + 8, 8); + + /* initialization */ + s.x0 = ASCON_80PQ_IV | K0; s.x1 = K1; s.x2 = K2; s.x3 = N0; s.x4 = N1; - printstate("initial value:", s); P12(&s); s.x2 ^= K0; s.x3 ^= K1; s.x4 ^= K2; - printstate("initialization:", s); + printstate("initialization", &s); - // process associated data + /* process associated data */ if (adlen) { - while (adlen >= RATE) { - s.x0 ^= BYTES_TO_U64(ad, 8); + while (adlen >= ASCON_128_RATE) { + s.x0 ^= LOAD(ad, 8); P6(&s); - adlen -= RATE; - ad += RATE; + ad += ASCON_128_RATE; + adlen -= ASCON_128_RATE; } - s.x0 ^= BYTES_TO_U64(ad, adlen); - s.x0 ^= 0x80ull << (56 - 8 * adlen); + /* final associated data block */ + s.x0 ^= LOAD(ad, adlen); + s.x0 ^= PAD(adlen); P6(&s); } s.x4 ^= 1; - printstate("process associated data:", s); + printstate("process associated data", &s); - // process plaintext + /* process ciphertext */ clen -= CRYPTO_ABYTES; - while (clen >= RATE) { - c0 = BYTES_TO_U64(c, 8); - U64_TO_BYTES(m, s.x0 ^ c0, 8); + while (clen >= ASCON_128_RATE) { + uint64_t c0 = LOAD(c, 8); + STORE(m, s.x0 ^ c0, 8); s.x0 = c0; P6(&s); - clen -= RATE; - m += RATE; - c += RATE; + m += ASCON_128_RATE; + c += ASCON_128_RATE; + clen -= ASCON_128_RATE; } - c0 = BYTES_TO_U64(c, clen); - U64_TO_BYTES(m, s.x0 ^ c0, clen); - s.x0 &= ~BYTE_MASK(clen); + /* final ciphertext block */ + uint64_t c0 = LOAD(c, clen); + STORE(m, s.x0 ^ c0, clen); + s.x0 &= ~MASK(clen); s.x0 |= c0; - s.x0 ^= 0x80ull << (56 - 8 * clen); + s.x0 ^= PAD(clen); c += clen; - printstate("process plaintext:", s); + printstate("process ciphertext", &s); - // finalization + /* finalization */ s.x1 ^= K0 << 32 | K1 >> 32; s.x2 ^= K1 << 32 | K2 >> 32; s.x3 ^= K2 << 32; P12(&s); s.x3 ^= K1; s.x4 ^= K2; - printstate("finalization:", s); + printstate("finalization", &s); - // verify tag (should be constant time, check compiler output) - if (((s.x3 ^ BYTES_TO_U64(c, 8)) | (s.x4 ^ BYTES_TO_U64(c + 8, 8))) != 0) { + /* verify tag (should be constant time, check compiler output) */ + if ((s.x3 ^ LOAD(c, 8)) | (s.x4 ^ LOAD(c + 8, 8))) { *mlen = 0; return -1; } return 0; } - diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/ref/encrypt.c b/ascon/Implementations/crypto_aead/ascon80pqv12/ref/encrypt.c index 0261896..4d7f9ac 100644 --- a/ascon/Implementations/crypto_aead/ascon80pqv12/ref/encrypt.c +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/ref/encrypt.c @@ -1,85 +1,83 @@ #include "api.h" +#include "ascon.h" +#include "loadstore.h" #include "permutations.h" +#include "printstate.h" -#define RATE (64 / 8) -#define PA_ROUNDS 12 -#define PB_ROUNDS 6 -#define IV \ - ((u64)(8 * (CRYPTO_KEYBYTES)) << 56 | (u64)(8 * (RATE)) << 48 | \ - (u64)(PA_ROUNDS) << 40 | (u64)(PB_ROUNDS) << 32) - -int crypto_aead_encrypt(unsigned char* c, unsigned long long* clen, - const unsigned char* m, unsigned long long mlen, - const unsigned char* ad, unsigned long long adlen, - const unsigned char* nsec, const unsigned char* npub, - const unsigned char* k) { - const u64 K0 = BYTES_TO_U64(k, 8) >> 32; - const u64 K1 = BYTES_TO_U64(k + 4, 8); - const u64 K2 = BYTES_TO_U64(k + 12, 8); - const u64 N0 = BYTES_TO_U64(npub, 8); - const u64 N1 = BYTES_TO_U64(npub + 8, 8); - state s; +int crypto_aead_encrypt(uint8_t* c, uint64_t* clen, const uint8_t* m, + uint64_t mlen, const uint8_t* ad, uint64_t adlen, + const uint8_t* nsec, const uint8_t* npub, + const uint8_t* k) { + uint64_t K0, K1, K2, N0, N1; + state_t s; (void)nsec; - // set ciphertext size + /* set ciphertext size */ *clen = mlen + CRYPTO_ABYTES; - // initialization - s.x0 = IV | K0; + /* load key and nonce */ + K0 = LOAD(k + 0, 4) >> 32; + K1 = LOAD(k + 4, 8); + K2 = LOAD(k + 12, 8); + N0 = LOAD(npub, 8); + N1 = LOAD(npub + 8, 8); + + /* initialization */ + s.x0 = ASCON_80PQ_IV | K0; s.x1 = K1; s.x2 = K2; s.x3 = N0; s.x4 = N1; - printstate("initial value:", s); P12(&s); s.x2 ^= K0; s.x3 ^= K1; s.x4 ^= K2; - printstate("initialization:", s); + printstate("initialization", &s); - // process associated data + /* process associated data */ if (adlen) { - while (adlen >= RATE) { - s.x0 ^= BYTES_TO_U64(ad, 8); + while (adlen >= ASCON_128_RATE) { + s.x0 ^= LOAD(ad, 8); P6(&s); - adlen -= RATE; - ad += RATE; + ad += ASCON_128_RATE; + adlen -= ASCON_128_RATE; } - s.x0 ^= BYTES_TO_U64(ad, adlen); - s.x0 ^= 0x80ull << (56 - 8 * adlen); + /* final associated data block */ + s.x0 ^= LOAD(ad, adlen); + s.x0 ^= PAD(adlen); P6(&s); } s.x4 ^= 1; - printstate("process associated data:", s); + printstate("process associated data", &s); - // process plaintext - while (mlen >= RATE) { - s.x0 ^= BYTES_TO_U64(m, 8); - U64_TO_BYTES(c, s.x0, 8); + /* process plaintext */ + while (mlen >= ASCON_128_RATE) { + s.x0 ^= LOAD(m, 8); + STORE(c, s.x0, 8); P6(&s); - mlen -= RATE; - m += RATE; - c += RATE; + m += ASCON_128_RATE; + c += ASCON_128_RATE; + mlen -= ASCON_128_RATE; } - s.x0 ^= BYTES_TO_U64(m, mlen); - s.x0 ^= 0x80ull << (56 - 8 * mlen); - U64_TO_BYTES(c, s.x0, mlen); + /* final plaintext block */ + s.x0 ^= LOAD(m, mlen); + STORE(c, s.x0, mlen); + s.x0 ^= PAD(mlen); c += mlen; - printstate("process plaintext:", s); + printstate("process plaintext", &s); - // finalization + /* finalization */ s.x1 ^= K0 << 32 | K1 >> 32; s.x2 ^= K1 << 32 | K2 >> 32; s.x3 ^= K2 << 32; P12(&s); s.x3 ^= K1; s.x4 ^= K2; - printstate("finalization:", s); + printstate("finalization", &s); - // set tag - U64_TO_BYTES(c, s.x3, 8); - U64_TO_BYTES(c + 8, s.x4, 8); + /* set tag */ + STORE(c, s.x3, 8); + STORE(c + 8, s.x4, 8); return 0; } - diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/ref/loadstore.h b/ascon/Implementations/crypto_aead/ascon80pqv12/ref/loadstore.h new file mode 100644 index 0000000..eccf0e3 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/ref/loadstore.h @@ -0,0 +1,31 @@ +#ifndef LOADSTORE_H_ +#define LOADSTORE_H_ + +#include + +/* get byte from Ascon 64-bit word */ +#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i)))) + +/* set byte in Ascon 64-bit word */ +#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i))) + +/* set padding byte in Ascon 64-bit word */ +#define PAD(i) SETBYTE(0x80, i) + +static inline uint64_t MASK(int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) x |= SETBYTE(0xff, i); + return x; +} + +static inline uint64_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) x |= SETBYTE(bytes[i], i); + return x; +} + +static inline void STORE(uint8_t* bytes, uint64_t x, int n) { + for (int i = 0; i < n; ++i) bytes[i] = GETBYTE(x, i); +} + +#endif /* LOADSTORE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/ref/permutations.h b/ascon/Implementations/crypto_aead/ascon80pqv12/ref/permutations.h index 3317a9f..ef6dd68 100644 --- a/ascon/Implementations/crypto_aead/ascon80pqv12/ref/permutations.h +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/ref/permutations.h @@ -1,132 +1,88 @@ #ifndef PERMUTATIONS_H_ #define PERMUTATIONS_H_ -#ifdef DEBUG -#include -#endif +#include -typedef unsigned char u8; -typedef unsigned long long u64; +#include "ascon.h" +#include "printstate.h" +#include "round.h" -typedef struct { - u64 x0, x1, x2, x3, x4; -} state; +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 -static inline void printstate(const char* text, const state s) { -#ifdef DEBUG - printf("%s\n", text); - printf(" x0=%016llx\n", s.x0); - printf(" x1=%016llx\n", s.x1); - printf(" x2=%016llx\n", s.x2); - printf(" x3=%016llx\n", s.x3); - printf(" x4=%016llx\n", s.x4); -#else - // disable warning about unused parameters - (void)text; - (void)s; -#endif -} +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 -static inline u64 BYTES_TO_U64(const u8* bytes, int n) { - int i; - u64 x = 0; - for (i = 0; i < n; i++) x |= ((u64)bytes[i]) << (56 - 8 * i); - return x; -} +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 -static inline void U64_TO_BYTES(u8* bytes, const u64 x, int n) { - int i; - for (i = 0; i < n; i++) bytes[i] = (u8)(x >> (56 - 8 * i)); -} +#define ASCON_HASH_BYTES 32 -static inline u64 BYTE_MASK(int n) { - int i; - u64 x = 0; - for (i = 0; i < n; i++) x |= 0xffull << (56 - 8 * i); - return x; -} +#define ASCON_128_IV \ + (((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) -static inline u64 ROTR64(u64 x, int n) { return (x << (64 - n)) | (x >> n); } +#define ASCON_128A_IV \ + (((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) -static inline void ROUND(u8 C, state* p) { - state s = *p; - state t; - // addition of round constant - s.x2 ^= C; - printstate(" addition of round constant:", s); - // substitution layer - s.x0 ^= s.x4; - s.x4 ^= s.x3; - s.x2 ^= s.x1; - // start of keccak s-box - t.x0 = ~s.x0; - t.x1 = ~s.x1; - t.x2 = ~s.x2; - t.x3 = ~s.x3; - t.x4 = ~s.x4; - t.x0 &= s.x1; - t.x1 &= s.x2; - t.x2 &= s.x3; - t.x3 &= s.x4; - t.x4 &= s.x0; - s.x0 ^= t.x1; - s.x1 ^= t.x2; - s.x2 ^= t.x3; - s.x3 ^= t.x4; - s.x4 ^= t.x0; - // end of keccak s-box - s.x1 ^= s.x0; - s.x0 ^= s.x4; - s.x3 ^= s.x2; - s.x2 = ~s.x2; - printstate(" substitution layer:", s); - // linear diffusion layer - s.x0 ^= ROTR64(s.x0, 19) ^ ROTR64(s.x0, 28); - s.x1 ^= ROTR64(s.x1, 61) ^ ROTR64(s.x1, 39); - s.x2 ^= ROTR64(s.x2, 1) ^ ROTR64(s.x2, 6); - s.x3 ^= ROTR64(s.x3, 10) ^ ROTR64(s.x3, 17); - s.x4 ^= ROTR64(s.x4, 7) ^ ROTR64(s.x4, 41); - printstate(" linear diffusion layer:", s); - *p = s; -} +#define ASCON_80PQ_IV \ + (((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) -static inline void P12(state* s) { - printstate(" permutation input:", *s); - ROUND(0xf0, s); - ROUND(0xe1, s); - ROUND(0xd2, s); - ROUND(0xc3, s); - ROUND(0xb4, s); - ROUND(0xa5, s); - ROUND(0x96, s); - ROUND(0x87, s); - ROUND(0x78, s); - ROUND(0x69, s); - ROUND(0x5a, s); - ROUND(0x4b, s); -} +#define ASCON_HASH_IV \ + (((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) -static inline void P8(state* s) { - printstate(" permutation input:", *s); - ROUND(0xb4, s); - ROUND(0xa5, s); - ROUND(0x96, s); - ROUND(0x87, s); - ROUND(0x78, s); - ROUND(0x69, s); - ROUND(0x5a, s); - ROUND(0x4b, s); +#define ASCON_XOF_IV \ + (((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) + +static inline void P12(state_t* s) { + printstate(" permutation input", s); + ROUND(s, 0xf0); + ROUND(s, 0xe1); + ROUND(s, 0xd2); + ROUND(s, 0xc3); + ROUND(s, 0xb4); + ROUND(s, 0xa5); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); } -static inline void P6(state* s) { - printstate(" permutation input:", *s); - ROUND(0x96, s); - ROUND(0x87, s); - ROUND(0x78, s); - ROUND(0x69, s); - ROUND(0x5a, s); - ROUND(0x4b, s); +static inline void P8(state_t* s) { + printstate(" permutation input", s); + ROUND(s, 0xb4); + ROUND(s, 0xa5); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); } -#endif // PERMUTATIONS_H_ +static inline void P6(state_t* s) { + printstate(" permutation input", s); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/ref/printstate.h b/ascon/Implementations/crypto_aead/ascon80pqv12/ref/printstate.h new file mode 100644 index 0000000..34bd476 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/ref/printstate.h @@ -0,0 +1,31 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_aead/ascon80pqv12/ref/round.h b/ascon/Implementations/crypto_aead/ascon80pqv12/ref/round.h new file mode 100644 index 0000000..dcb6c81 --- /dev/null +++ b/ascon/Implementations/crypto_aead/ascon80pqv12/ref/round.h @@ -0,0 +1,40 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" + +static inline uint64_t ROR64(uint64_t x, int n) { + return (x << (64 - n)) | (x >> n); +} + +static inline void ROUND(state_t* s, uint8_t C) { + state_t t; + /* addition of round constant */ + s->x2 ^= C; + /* substitution layer */ + s->x0 ^= s->x4; + s->x4 ^= s->x3; + s->x2 ^= s->x1; + /* start of keccak s-box */ + t.x0 = s->x0 ^ (~s->x1 & s->x2); + t.x1 = s->x1 ^ (~s->x2 & s->x3); + t.x2 = s->x2 ^ (~s->x3 & s->x4); + t.x3 = s->x3 ^ (~s->x4 & s->x0); + t.x4 = s->x4 ^ (~s->x0 & s->x1); + /* end of keccak s-box */ + t.x1 ^= t.x0; + t.x0 ^= t.x4; + t.x3 ^= t.x2; + t.x2 = ~t.x2; + /* printstate(" substitution layer", &t); */ + /* linear diffusion layer */ + s->x0 = t.x0 ^ ROR64(t.x0, 19) ^ ROR64(t.x0, 28); + s->x1 = t.x1 ^ ROR64(t.x1, 61) ^ ROR64(t.x1, 39); + s->x2 = t.x2 ^ ROR64(t.x2, 1) ^ ROR64(t.x2, 6); + s->x3 = t.x3 ^ ROR64(t.x3, 10) ^ ROR64(t.x3, 17); + s->x4 = t.x4 ^ ROR64(t.x4, 7) ^ ROR64(t.x4, 41); + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi16/api.h b/ascon/Implementations/crypto_hash/asconhashv12/bi16/api.h index ae8c7f6..ad0325e 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/bi16/api.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi16/api.h @@ -1 +1,3 @@ #define CRYPTO_BYTES 32 +#define ASCON_RATE 8 +#define ASCON_HASH diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi16/endian.h b/ascon/Implementations/crypto_hash/asconhashv12/bi16/endian.h index ffbdcd9..3944360 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/bi16/endian.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi16/endian.h @@ -3,29 +3,37 @@ #if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -// macros for big endian machines +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif #define U64BIG(x) (x) #define U32BIG(x) (x) #define U16BIG(x) (x) #elif defined(_MSC_VER) || \ - (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -// macros for little endian machines -#define U64BIG(x) \ - ((((x) & 0x00000000000000FFULL) << 56) | (((x) & 0x000000000000FF00ULL) << 40) | \ - (((x) & 0x0000000000FF0000ULL) << 24) | (((x) & 0x00000000FF000000ULL) << 8) | \ - (((x) & 0x000000FF00000000ULL) >> 8) | (((x) & 0x0000FF0000000000ULL) >> 24) | \ - (((x) & 0x00FF000000000000ULL) >> 40) | (((x) & 0xFF00000000000000ULL) >> 56)) +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) #define U32BIG(x) \ - ((((x) & 0x000000FF) << 24) | (((x) & 0x0000FF00) << 8) | \ - (((x) & 0x00FF0000) >> 8) | (((x) & 0xFF000000) >> 24)) -#define U16BIG(x) \ - ((((x) & 0x00FF) << 8) | (((x) & 0xFF00) >> 8)) + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) #else -#error "ascon byte order macros not defined in endian.h" +#error "Ascon byte order macros not defined in endian.h" #endif -#endif // ENDIAN_H_ - +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi16/hash.c b/ascon/Implementations/crypto_hash/asconhashv12/bi16/hash.c index 615ba99..75b96c2 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/bi16/hash.c +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi16/hash.c @@ -1,18 +1,10 @@ +#include + #include "api.h" #include "endian.h" -typedef unsigned char u8; -typedef unsigned short u16; -typedef unsigned int u32; -typedef unsigned long long u64; - -#define RATE (64 / 8) -#define PA_ROUNDS 12 - -#define EXT_BYTE16(x, n) ((u8)((u16)(x) >> (8 * (1 - (n))))) -#define INS_BYTE16(x, n) ((u16)(x) << (8 * (1 - (n)))) -#define ROTR16(x, n) (((x) >> (n)) | ((x) << (16 - (n)))) -#define ROTL16(x, n) (((x) << (n)) | ((x) >> (16 - (n)))) +#define ROR16(x, n) (((x) >> (n)) | ((x) << (16 - (n)))) +#define ROL16(x, n) (((x) << (n)) | ((x) >> (16 - (n)))) #define COMPRESS_LONG_16(x) \ do { \ @@ -34,24 +26,24 @@ typedef unsigned long long u64; COMPRESS_LONG_16(var_3); \ } while (0) -#define COMPRESS_BYTE_ARRAY_16(a, var_3, var_2, var_1, var_0) \ - do { \ - COMPRESS_U16(U16BIG(((u16 *)(a))[3]), var_3, var_2, var_1, var_0); \ - COMPRESS_U16(U16BIG(((u16 *)(a))[2]), t1_3, t1_2, t1_1, t1_0); \ - var_0 |= t1_0 << 4; \ - var_1 |= t1_1 << 4; \ - var_2 |= t1_2 << 4; \ - var_3 |= t1_3 << 4; \ - COMPRESS_U16(U16BIG(((u16 *)(a))[1]), t1_3, t1_2, t1_1, t1_0); \ - var_0 |= t1_0 << 8; \ - var_1 |= t1_1 << 8; \ - var_2 |= t1_2 << 8; \ - var_3 |= t1_3 << 8; \ - COMPRESS_U16(U16BIG(((u16 *)(a))[0]), t1_3, t1_2, t1_1, t1_0); \ - var_0 |= t1_0 << 12; \ - var_1 |= t1_1 << 12; \ - var_2 |= t1_2 << 12; \ - var_3 |= t1_3 << 12; \ +#define COMPRESS_BYTE_ARRAY_16(a, var_3, var_2, var_1, var_0) \ + do { \ + COMPRESS_U16(U16BIG(((uint16_t*)(a))[3]), var_3, var_2, var_1, var_0); \ + COMPRESS_U16(U16BIG(((uint16_t*)(a))[2]), t1_3, t1_2, t1_1, t1_0); \ + var_0 |= t1_0 << 4; \ + var_1 |= t1_1 << 4; \ + var_2 |= t1_2 << 4; \ + var_3 |= t1_3 << 4; \ + COMPRESS_U16(U16BIG(((uint16_t*)(a))[1]), t1_3, t1_2, t1_1, t1_0); \ + var_0 |= t1_0 << 8; \ + var_1 |= t1_1 << 8; \ + var_2 |= t1_2 << 8; \ + var_3 |= t1_3 << 8; \ + COMPRESS_U16(U16BIG(((uint16_t*)(a))[0]), t1_3, t1_2, t1_1, t1_0); \ + var_0 |= t1_0 << 12; \ + var_1 |= t1_1 << 12; \ + var_2 |= t1_2 << 12; \ + var_3 |= t1_3 << 12; \ } while (0) #define EXPAND_SHORT_16(x) \ @@ -75,28 +67,29 @@ typedef unsigned long long u64; *var = U16BIG(t0_0 | (t0_1 << 1) | (t0_2 << 2) | (t0_3 << 3)); \ } while (0) -#define EXPAND_BYTE_ARRAY_16(a, var_3, var_2, var_1, var_0) \ - do { \ - EXPAND_U16((((u16 *)(a)) + 3), var_3, var_2, var_1, var_0); \ - t1_3 = var_3 >> 4; \ - t1_2 = var_2 >> 4; \ - t1_1 = var_1 >> 4; \ - t1_0 = var_0 >> 4; \ - EXPAND_U16((((u16 *)(a)) + 2), t1_3, t1_2, t1_1, t1_0); \ - t1_3 >>= 4; \ - t1_2 >>= 4; \ - t1_1 >>= 4; \ - t1_0 >>= 4; \ - EXPAND_U16((((u16 *)(a)) + 1), t1_3, t1_2, t1_1, t1_0); \ - t1_3 >>= 4; \ - t1_2 >>= 4; \ - t1_1 >>= 4; \ - t1_0 >>= 4; \ - EXPAND_U16((((u16 *)(a)) + 0), t1_3, t1_2, t1_1, t1_0); \ +#define EXPAND_BYTE_ARRAY_16(a, var_3, var_2, var_1, var_0) \ + do { \ + EXPAND_U16((((uint16_t*)(a)) + 3), var_3, var_2, var_1, var_0); \ + t1_3 = var_3 >> 4; \ + t1_2 = var_2 >> 4; \ + t1_1 = var_1 >> 4; \ + t1_0 = var_0 >> 4; \ + EXPAND_U16((((uint16_t*)(a)) + 2), t1_3, t1_2, t1_1, t1_0); \ + t1_3 >>= 4; \ + t1_2 >>= 4; \ + t1_1 >>= 4; \ + t1_0 >>= 4; \ + EXPAND_U16((((uint16_t*)(a)) + 1), t1_3, t1_2, t1_1, t1_0); \ + t1_3 >>= 4; \ + t1_2 >>= 4; \ + t1_1 >>= 4; \ + t1_0 >>= 4; \ + EXPAND_U16((((uint16_t*)(a)) + 0), t1_3, t1_2, t1_1, t1_0); \ } while (0) -// This way of implementing Ascon's S-box was inpired by personal communication -// with Joan Daemen about implementing the 3-bit chi layer. +/* This way of implementing Ascon's S-box was inpired by personal communication + */ +/* with Joan Daemen about implementing the 3-bit chi layer. */ #define ROUND_16(C_3, C_2, C_1, C_0) \ do { \ /* round constant */ \ @@ -178,62 +171,62 @@ typedef unsigned long long u64; t0_1 = x0_1; \ t0_2 = x0_2; \ t0_3 = x0_3; \ - x0_1 ^= ROTR16(t0_0, 5); \ - x0_2 ^= ROTR16(t0_1, 5); \ - x0_3 ^= ROTR16(t0_2, 5); \ - x0_0 ^= ROTR16(t0_3, 4); \ - x0_0 ^= ROTR16(t0_0, 7); \ - x0_1 ^= ROTR16(t0_1, 7); \ - x0_2 ^= ROTR16(t0_2, 7); \ - x0_3 ^= ROTR16(t0_3, 7); \ + x0_1 ^= ROR16(t0_0, 5); \ + x0_2 ^= ROR16(t0_1, 5); \ + x0_3 ^= ROR16(t0_2, 5); \ + x0_0 ^= ROR16(t0_3, 4); \ + x0_0 ^= ROR16(t0_0, 7); \ + x0_1 ^= ROR16(t0_1, 7); \ + x0_2 ^= ROR16(t0_2, 7); \ + x0_3 ^= ROR16(t0_3, 7); \ t0_0 = x1_0; \ t0_1 = x1_1; \ t0_2 = x1_2; \ t0_3 = x1_3; \ x1_3 ^= t0_0; \ - x1_0 ^= ROTL16(t0_1, 1); \ - x1_1 ^= ROTL16(t0_2, 1); \ - x1_2 ^= ROTL16(t0_3, 1); \ - x1_1 ^= ROTL16(t0_0, 6); \ - x1_2 ^= ROTL16(t0_1, 6); \ - x1_3 ^= ROTL16(t0_2, 6); \ - x1_0 ^= ROTL16(t0_3, 7); \ + x1_0 ^= ROL16(t0_1, 1); \ + x1_1 ^= ROL16(t0_2, 1); \ + x1_2 ^= ROL16(t0_3, 1); \ + x1_1 ^= ROL16(t0_0, 6); \ + x1_2 ^= ROL16(t0_1, 6); \ + x1_3 ^= ROL16(t0_2, 6); \ + x1_0 ^= ROL16(t0_3, 7); \ t0_0 = x2_0; \ t0_1 = x2_1; \ t0_2 = x2_2; \ t0_3 = x2_3; \ - x2_3 ^= ROTR16(t0_0, 1); \ + x2_3 ^= ROR16(t0_0, 1); \ x2_0 ^= t0_1; \ x2_1 ^= t0_2; \ x2_2 ^= t0_3; \ - x2_2 ^= ROTR16(t0_0, 2); \ - x2_3 ^= ROTR16(t0_1, 2); \ - x2_0 ^= ROTR16(t0_2, 1); \ - x2_1 ^= ROTR16(t0_3, 1); \ + x2_2 ^= ROR16(t0_0, 2); \ + x2_3 ^= ROR16(t0_1, 2); \ + x2_0 ^= ROR16(t0_2, 1); \ + x2_1 ^= ROR16(t0_3, 1); \ t0_0 = x3_0; \ t0_1 = x3_1; \ t0_2 = x3_2; \ t0_3 = x3_3; \ - x3_2 ^= ROTR16(t0_0, 3); \ - x3_3 ^= ROTR16(t0_1, 3); \ - x3_0 ^= ROTR16(t0_2, 2); \ - x3_1 ^= ROTR16(t0_3, 2); \ - x3_3 ^= ROTR16(t0_0, 5); \ - x3_0 ^= ROTR16(t0_1, 4); \ - x3_1 ^= ROTR16(t0_2, 4); \ - x3_2 ^= ROTR16(t0_3, 4); \ + x3_2 ^= ROR16(t0_0, 3); \ + x3_3 ^= ROR16(t0_1, 3); \ + x3_0 ^= ROR16(t0_2, 2); \ + x3_1 ^= ROR16(t0_3, 2); \ + x3_3 ^= ROR16(t0_0, 5); \ + x3_0 ^= ROR16(t0_1, 4); \ + x3_1 ^= ROR16(t0_2, 4); \ + x3_2 ^= ROR16(t0_3, 4); \ t0_0 = x4_0; \ t0_1 = x4_1; \ t0_2 = x4_2; \ t0_3 = x4_3; \ - x4_1 ^= ROTR16(t0_0, 2); \ - x4_2 ^= ROTR16(t0_1, 2); \ - x4_3 ^= ROTR16(t0_2, 2); \ - x4_0 ^= ROTR16(t0_3, 1); \ - x4_3 ^= ROTL16(t0_0, 5); \ - x4_0 ^= ROTL16(t0_1, 6); \ - x4_1 ^= ROTL16(t0_2, 6); \ - x4_2 ^= ROTL16(t0_3, 6); \ + x4_1 ^= ROR16(t0_0, 2); \ + x4_2 ^= ROR16(t0_1, 2); \ + x4_3 ^= ROR16(t0_2, 2); \ + x4_0 ^= ROR16(t0_3, 1); \ + x4_3 ^= ROL16(t0_0, 5); \ + x4_0 ^= ROL16(t0_1, 6); \ + x4_1 ^= ROL16(t0_2, 6); \ + x4_2 ^= ROL16(t0_3, 6); \ } while (0) #define P12_16 \ @@ -252,28 +245,27 @@ typedef unsigned long long u64; ROUND_16(1, 2, 1, 1); \ } while (0) -int crypto_hash(unsigned char *out, const unsigned char *in, - unsigned long long inlen) { - u64 rlen; - u64 i; +int crypto_hash(uint8_t* out, const uint8_t* in, uint64_t inlen) { + uint64_t rlen; + uint64_t i; - u8 buffer[8]; + uint8_t buffer[8]; - u16 x0_0, x1_0, x2_0, x3_0, x4_0; - u16 t0_0, t1_0; + uint16_t x0_0, x1_0, x2_0, x3_0, x4_0; + uint16_t t0_0, t1_0; - u16 x0_1, x1_1, x2_1, x3_1, x4_1; - u16 t0_1, t1_1; + uint16_t x0_1, x1_1, x2_1, x3_1, x4_1; + uint16_t t0_1, t1_1; - u16 x0_2, x1_2, x2_2, x3_2, x4_2; - u16 t0_2, t1_2; + uint16_t x0_2, x1_2, x2_2, x3_2, x4_2; + uint16_t t0_2, t1_2; - u16 x0_3, x1_3, x2_3, x3_3, x4_3; - u16 t0_3, t1_3; + uint16_t x0_3, x1_3, x2_3, x3_3, x4_3; + uint16_t t0_3, t1_3; - u16 in_0, in_1, in_2, in_3; + uint16_t in_0, in_1, in_2, in_3; - // initialization + /* initialization */ x0_0 = 0x38db; x0_1 = 0xd37a; x0_2 = 0xc0b9; @@ -295,17 +287,17 @@ int crypto_hash(unsigned char *out, const unsigned char *in, x4_2 = 0x56da; x4_3 = 0x3b88; - // absorb + /* absorb */ rlen = inlen; - while (rlen >= RATE) { + while (rlen >= ASCON_RATE) { COMPRESS_BYTE_ARRAY_16(in, in_3, in_2, in_1, in_0); x0_0 ^= in_0; x0_1 ^= in_1; x0_2 ^= in_2; x0_3 ^= in_3; P12_16; - rlen -= RATE; - in += RATE; + rlen -= ASCON_RATE; + in += ASCON_RATE; } for (i = 0; i < rlen; ++i, ++in) buffer[i] = *in; buffer[rlen] = 0x80; @@ -317,16 +309,15 @@ int crypto_hash(unsigned char *out, const unsigned char *in, x0_3 ^= in_3; P12_16; - // squeeze (only full blocks) + /* squeeze (only full blocks) */ rlen = CRYPTO_BYTES; - while (rlen > RATE) { + while (rlen > ASCON_RATE) { EXPAND_BYTE_ARRAY_16(out, x0_3, x0_2, x0_1, x0_0); P12_16; - rlen -= RATE; - out += RATE; + rlen -= ASCON_RATE; + out += ASCON_RATE; } EXPAND_BYTE_ARRAY_16(out, x0_3, x0_2, x0_1, x0_0); return 0; } - diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32/api.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32/api.h index ae8c7f6..ad0325e 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/bi32/api.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32/api.h @@ -1 +1,3 @@ #define CRYPTO_BYTES 32 +#define ASCON_RATE 8 +#define ASCON_HASH diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32/ascon.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32/ascon.h new file mode 100644 index 0000000..10a5b6e --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32/ascon.h @@ -0,0 +1,19 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#include "config.h" +#include "word.h" + +typedef struct { + word_t x0, x1, x2, x3, x4; +} state_t; + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k); +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen); +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); +void ascon_final(state_t* s, const uint8_t* k); + +#endif // ASCON_H_ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32/config.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32/config.h new file mode 100644 index 0000000..b1b5080 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32/config.h @@ -0,0 +1,34 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* inline the Ascon mode */ +#ifndef ASCON_INLINE_MODE +#define ASCON_INLINE_MODE 1 +#endif + +/* inline the Ascon permutations */ +#ifndef ASCON_INLINE_PERM +#define ASCON_INLINE_PERM 0 +#endif + +/* single function for all permutations */ +#ifndef ASCON_SINGLE_PERM +#define ASCON_SINGLE_PERM 0 +#endif + +/* unroll the permutation loops */ +#ifndef ASCON_UNROLL_LOOPS +#define ASCON_UNROLL_LOOPS 0 +#endif + +/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ +#ifndef ASCON_DATA_ACCESS +#define ASCON_DATA_ACCESS 'M' +#endif + +/* make sure __forceinline is supported */ +#ifndef __forceinline +#define __forceinline inline __attribute__((always_inline)) +#endif + +#endif /* CONFIG_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32/endian.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32/endian.h index ffbdcd9..3944360 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/bi32/endian.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32/endian.h @@ -3,29 +3,37 @@ #if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -// macros for big endian machines +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif #define U64BIG(x) (x) #define U32BIG(x) (x) #define U16BIG(x) (x) #elif defined(_MSC_VER) || \ - (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -// macros for little endian machines -#define U64BIG(x) \ - ((((x) & 0x00000000000000FFULL) << 56) | (((x) & 0x000000000000FF00ULL) << 40) | \ - (((x) & 0x0000000000FF0000ULL) << 24) | (((x) & 0x00000000FF000000ULL) << 8) | \ - (((x) & 0x000000FF00000000ULL) >> 8) | (((x) & 0x0000FF0000000000ULL) >> 24) | \ - (((x) & 0x00FF000000000000ULL) >> 40) | (((x) & 0xFF00000000000000ULL) >> 56)) +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) #define U32BIG(x) \ - ((((x) & 0x000000FF) << 24) | (((x) & 0x0000FF00) << 8) | \ - (((x) & 0x00FF0000) >> 8) | (((x) & 0xFF000000) >> 24)) -#define U16BIG(x) \ - ((((x) & 0x00FF) << 8) | (((x) & 0xFF00) >> 8)) + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) #else -#error "ascon byte order macros not defined in endian.h" +#error "Ascon byte order macros not defined in endian.h" #endif -#endif // ENDIAN_H_ - +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32/hash.c b/ascon/Implementations/crypto_hash/asconhashv12/bi32/hash.c index b09a406..34dec63 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/bi32/hash.c +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32/hash.c @@ -1,59 +1,51 @@ #include "api.h" -#include "endian.h" +#include "ascon.h" +#include "loadstore.h" #include "permutations.h" - -#define RATE (64 / 8) -#define PA_ROUNDS 12 - -int crypto_hash(unsigned char* out, const unsigned char* in, - unsigned long long inlen) { - u32_2 x0, x1, x2, x3, x4; - u32_2 t0, t1, t2, t3, t4; - u64 outlen, tmp0; - u32 i; - - // initialization - x0.e = 0xa540dbc7; - x0.o = 0xf9afb5c6; - x1.e = 0x1445a340; - x1.o = 0xbd249301; - x2.e = 0x604d4fc8; - x2.o = 0xcb9ba8b5; - x3.e = 0x94514c98; - x3.o = 0x12a4eede; - x4.e = 0x6339f398; - x4.o = 0x4bca84c0; - - // absorb plaintext - while (inlen >= RATE) { - to_bit_interleaving(t0, U64BIG(*(u64*)in)); - x0.e ^= t0.e; - x0.o ^= t0.o; - P12(); - inlen -= RATE; - in += RATE; +#include "printstate.h" + +int crypto_hash(uint8_t* out, const uint8_t* in, uint64_t len) { + state_t s; + + /* initialization */ +#ifdef ASCON_HASH + s.x0 = ASCON_HASH_IV0; + s.x1 = ASCON_HASH_IV1; + s.x2 = ASCON_HASH_IV2; + s.x3 = ASCON_HASH_IV3; + s.x4 = ASCON_HASH_IV4; +#endif +#ifdef ASCON_XOF + s.x0 = ASCON_XOF_IV0; + s.x1 = ASCON_XOF_IV1; + s.x2 = ASCON_XOF_IV2; + s.x3 = ASCON_XOF_IV3; + s.x4 = ASCON_XOF_IV4; +#endif + printstate("initialization", &s); + + /* absorb plaintext */ + while (len >= ASCON_RATE) { + XOR(s.x0, LOAD64(in)); + P12(&s); + in += ASCON_RATE; + len -= ASCON_RATE; } - tmp0 = 0; - for (i = 0; i < inlen; ++i, ++in) tmp0 |= INS_BYTE64(*in, i); - tmp0 |= INS_BYTE64(0x80, inlen); - to_bit_interleaving(t0, tmp0); - x0.e ^= t0.e; - x0.o ^= t0.o; - - P12(); - - // squeeze output - outlen = CRYPTO_BYTES; - while (outlen > RATE) { - from_bit_interleaving(tmp0, x0); - *(u64*)out = U64BIG(tmp0); - P12(); - outlen -= RATE; - out += RATE; + if (len) XOR(s.x0, LOAD(in, len)); + XOR(s.x0, PAD(len)); + P12(&s); + printstate("absorb plaintext", &s); + + /* squeeze output */ + len = CRYPTO_BYTES; + while (len > ASCON_RATE) { + STORE64(out, s.x0); + P12(&s); + out += ASCON_RATE; + len -= ASCON_RATE; } - from_bit_interleaving(tmp0, x0); - *(u64*)out = U64BIG(tmp0); + STORE64(out, s.x0); + printstate("squeeze output", &s); return 0; } - diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32/loadstore.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32/loadstore.h new file mode 100644 index 0000000..6cb0cca --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32/loadstore.h @@ -0,0 +1,118 @@ +#ifndef LOADSTORE_H_ +#define LOADSTORE_H_ + +#include + +#include "config.h" +#include "endian.h" +#include "word.h" + +/* 64-bit LSB mask (undefined for n == 0) */ +#define MASK(n) (~0ull >> (64 - (n))) + +/* get byte from Ascon 64-bit word */ +#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i)))) + +/* set byte in Ascon 64-bit word */ +#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i))) + +#if ASCON_DATA_ACCESS == 'W' + +#ifndef NDEBUG +#pragma message("Using wordwise data access") +#endif + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = U64BIG(*(uint64_t*)bytes); + return U64TOWORD(x); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = U64BIG(*(uint64_t*)bytes & MASK(8 * n)); + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(8 * n); + *(uint64_t*)bytes |= U64BIG(x); +} + +#elif ASCON_DATA_ACCESS == 'M' + +#ifndef NDEBUG +#pragma message("Using memcpy to access data") +#endif + +#include + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + memcpy((uint8_t*)&x, bytes, n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = U64BIG(WORDTOU64(w)); + memcpy(bytes, (uint8_t*)&x, n); +} + +#elif ASCON_DATA_ACCESS == 'B' + +#ifndef NDEBUG +#pragma message("Using bytewise data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#elif ASCON_DATA_ACCESS == 'H' + +#ifndef NDEBUG +#pragma message("Using hybrid data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + if (n == 8) + x = U64BIG(*(uint64_t*)bytes); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + if (n == 8) + *(uint64_t*)bytes = U64BIG(x); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#else +#error "Ascon data access macro not defined correctly" +#endif + +#endif /* LOADSTORE_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32/permutations.c b/ascon/Implementations/crypto_hash/asconhashv12/bi32/permutations.c new file mode 100644 index 0000000..1bca2ef --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32/permutations.c @@ -0,0 +1,35 @@ +#include "permutations.h" + +#include "round.h" + +#if !ASCON_UNROLL_LOOPS || ASCON_SINGLE_PERM + +const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, + {0x6, 0xc}, {0x3, 0xc}, {0x6, 0x9}, {0x3, 0x9}, + {0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}}; + +#endif + +#if ASCON_INLINE_PERM + +#elif ASCON_SINGLE_PERM + +void P(state_t* s, uint8_t rounds) { + printstate(" permutation input", s); + for (int i = START(rounds); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s) { P12ROUNDS(s); } + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16 +void P8(state_t* s) { P8ROUNDS(s); } +#endif + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 +void P6(state_t* s) { P6ROUNDS(s); } +#endif + +#endif diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32/permutations.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32/permutations.h index 8b13e99..ef338f1 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/bi32/permutations.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32/permutations.h @@ -1,128 +1,189 @@ #ifndef PERMUTATIONS_H_ #define PERMUTATIONS_H_ -#include "endian.h" - -typedef unsigned char u8; -typedef unsigned int u32; -typedef unsigned long long u64; - -typedef struct { - u32 e; - u32 o; -} u32_2; - -#define EXT_BYTE64(x, n) ((u8)((u64)(x) >> (8 * (7 - (n))))) -#define INS_BYTE64(x, n) ((u64)(x) << (8 * (7 - (n)))) -#define ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n)))) - -// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 -#define to_bit_interleaving(out, in) \ - do { \ - u32 hi = (in) >> 32; \ - u32 lo = (u32)(in); \ - u32 r0, r1; \ - r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \ - r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \ - r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \ - r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \ - r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \ - r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \ - r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \ - r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \ - (out).e = (lo & 0x0000FFFF) | (hi << 16); \ - (out).o = (lo >> 16) | (hi & 0xFFFF0000); \ - } while (0) - -// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 -#define from_bit_interleaving(out, in) \ - do { \ - u32 lo = ((in).e & 0x0000FFFF) | ((in).o << 16); \ - u32 hi = ((in).e >> 16) | ((in).o & 0xFFFF0000); \ - u32 r0, r1; \ - r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \ - r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \ - r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \ - r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \ - r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \ - r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \ - r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \ - r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \ - out = (u64)hi << 32 | lo; \ - } while (0) - -#define ROUND(C_e, C_o) \ - do { \ - /* round constant */ \ - x2.e ^= C_e; x2.o ^= C_o; \ - /* s-box layer */ \ - x0.e ^= x4.e; x0.o ^= x4.o; \ - x4.e ^= x3.e; x4.o ^= x3.o; \ - x2.e ^= x1.e; x2.o ^= x1.o; \ - t0.e = x0.e; t0.o = x0.o; \ - t4.e = x4.e; t4.o = x4.o; \ - t3.e = x3.e; t3.o = x3.o; \ - t1.e = x1.e; t1.o = x1.o; \ - t2.e = x2.e; t2.o = x2.o; \ - x0.e = t0.e ^ (~t1.e & t2.e); x0.o = t0.o ^ (~t1.o & t2.o); \ - x2.e = t2.e ^ (~t3.e & t4.e); x2.o = t2.o ^ (~t3.o & t4.o); \ - x4.e = t4.e ^ (~t0.e & t1.e); x4.o = t4.o ^ (~t0.o & t1.o); \ - x1.e = t1.e ^ (~t2.e & t3.e); x1.o = t1.o ^ (~t2.o & t3.o); \ - x3.e = t3.e ^ (~t4.e & t0.e); x3.o = t3.o ^ (~t4.o & t0.o); \ - x1.e ^= x0.e; x1.o ^= x0.o; \ - x3.e ^= x2.e; x3.o ^= x2.o; \ - x0.e ^= x4.e; x0.o ^= x4.o; \ - /* linear layer */ \ - t0.e = x0.e ^ ROTR32(x0.o, 4); t0.o = x0.o ^ ROTR32(x0.e, 5); \ - t1.e = x1.e ^ ROTR32(x1.e, 11); t1.o = x1.o ^ ROTR32(x1.o, 11); \ - t2.e = x2.e ^ ROTR32(x2.o, 2); t2.o = x2.o ^ ROTR32(x2.e, 3); \ - t3.e = x3.e ^ ROTR32(x3.o, 3); t3.o = x3.o ^ ROTR32(x3.e, 4); \ - t4.e = x4.e ^ ROTR32(x4.e, 17); t4.o = x4.o ^ ROTR32(x4.o, 17); \ - x0.e ^= ROTR32(t0.o, 9); x0.o ^= ROTR32(t0.e, 10); \ - x1.e ^= ROTR32(t1.o, 19); x1.o ^= ROTR32(t1.e, 20); \ - x2.e ^= t2.o; x2.o ^= ROTR32(t2.e, 1); \ - x3.e ^= ROTR32(t3.e, 5); x3.o ^= ROTR32(t3.o, 5); \ - x4.e ^= ROTR32(t4.o, 3); x4.o ^= ROTR32(t4.e, 4); \ - x2.e = ~x2.e; x2.o = ~x2.o; \ - } while(0) - -#define P12() \ - do { \ - ROUND(0xc, 0xc); \ - ROUND(0x9, 0xc); \ - ROUND(0xc, 0x9); \ - ROUND(0x9, 0x9); \ - ROUND(0x6, 0xc); \ - ROUND(0x3, 0xc); \ - ROUND(0x6, 0x9); \ - ROUND(0x3, 0x9); \ - ROUND(0xc, 0x6); \ - ROUND(0x9, 0x6); \ - ROUND(0xc, 0x3); \ - ROUND(0x9, 0x3); \ - } while (0) - -#define P8() \ - do { \ - ROUND(0x6, 0xc); \ - ROUND(0x3, 0xc); \ - ROUND(0x6, 0x9); \ - ROUND(0x3, 0x9); \ - ROUND(0xc, 0x6); \ - ROUND(0x9, 0x6); \ - ROUND(0xc, 0x3); \ - ROUND(0x9, 0x3); \ - } while (0) - -#define P6() \ - do { \ - ROUND(0x6, 0x9); \ - ROUND(0x3, 0x9); \ - ROUND(0xc, 0x6); \ - ROUND(0x9, 0x6); \ - ROUND(0xc, 0x3); \ - ROUND(0x9, 0x3); \ - } while (0) - -#endif // PERMUTATIONS_H_ +#include +#include "api.h" +#include "ascon.h" +#include "printstate.h" +#include "round.h" + +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 + +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 + +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 + +#define ASCON_HASH_BYTES 32 + +#define ASCON_128_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_128A_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) + +#define ASCON_80PQ_IV \ + U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_HASH_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) + +#define ASCON_XOF_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) + +#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) +#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) +#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) +#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) +#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) + +#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) +#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) +#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) +#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) +#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 +#define IV ASCON_128_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#if ASCON_RATE == 16 +#define IV ASCON_128A_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define PB P8 +#endif + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 +#define IV ASCON_80PQ_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#define START(n) (12 - n) + +#if ASCON_UNROLL_LOOPS + +__forceinline void P12ROUNDS(state_t* s) { + ROUND(s, 0xc, 0xc); + ROUND(s, 0x9, 0xc); + ROUND(s, 0xc, 0x9); + ROUND(s, 0x9, 0x9); + ROUND(s, 0x6, 0xc); + ROUND(s, 0x3, 0xc); + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +__forceinline void P8ROUNDS(state_t* s) { + ROUND(s, 0x6, 0xc); + ROUND(s, 0x3, 0xc); + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +__forceinline void P6ROUNDS(state_t* s) { + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +#else /* !ASCON_UNROLL_LOOPS */ + +extern const uint8_t constants[][2]; + +__forceinline void P12ROUNDS(state_t* s) { + for (int i = START(12); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +__forceinline void P8ROUNDS(state_t* s) { + for (int i = START(8); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +__forceinline void P6ROUNDS(state_t* s) { + for (int i = START(6); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +#endif + +#if ASCON_INLINE_PERM + +__forceinline void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +__forceinline void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} + +__forceinline void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#elif ASCON_SINGLE_PERM + +#define P12(s) P(s, 12) +#define P8(s) P(s, 8) +#define P6(s) P(s, 6) + +void P(state_t* s, uint8_t rounds); + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s); +void P8(state_t* s); +void P6(state_t* s); + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#endif + +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32/printstate.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32/printstate.h new file mode 100644 index 0000000..34bd476 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32/printstate.h @@ -0,0 +1,31 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32/round.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32/round.h new file mode 100644 index 0000000..d8ea3b6 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32/round.h @@ -0,0 +1,85 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" + +__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { + *K0 = WORD_T(0); + *K1 = WORD_T(0); + *K2 = WORD_T(0); +} + +__forceinline void PINIT(state_t* s) { + s->x0 = WORD_T(0); + s->x1 = WORD_T(0); + s->x2 = WORD_T(0); + s->x3 = WORD_T(0); + s->x4 = WORD_T(0); +} + +__forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) { + state_t t; + /* round constant */ + s->x2.e ^= C_e; + s->x2.o ^= C_o; + /* s-box layer */ + s->x0.e ^= s->x4.e; + s->x0.o ^= s->x4.o; + s->x4.e ^= s->x3.e; + s->x4.o ^= s->x3.o; + s->x2.e ^= s->x1.e; + s->x2.o ^= s->x1.o; + t.x0.e = s->x0.e; + t.x0.o = s->x0.o; + t.x4.e = s->x4.e; + t.x4.o = s->x4.o; + t.x3.e = s->x3.e; + t.x3.o = s->x3.o; + t.x1.e = s->x1.e; + t.x1.o = s->x1.o; + t.x2.e = s->x2.e; + t.x2.o = s->x2.o; + s->x0.e = t.x0.e ^ (~t.x1.e & t.x2.e); + s->x0.o = t.x0.o ^ (~t.x1.o & t.x2.o); + s->x2.e = t.x2.e ^ (~t.x3.e & t.x4.e); + s->x2.o = t.x2.o ^ (~t.x3.o & t.x4.o); + s->x4.e = t.x4.e ^ (~t.x0.e & t.x1.e); + s->x4.o = t.x4.o ^ (~t.x0.o & t.x1.o); + s->x1.e = t.x1.e ^ (~t.x2.e & t.x3.e); + s->x1.o = t.x1.o ^ (~t.x2.o & t.x3.o); + s->x3.e = t.x3.e ^ (~t.x4.e & t.x0.e); + s->x3.o = t.x3.o ^ (~t.x4.o & t.x0.o); + s->x1.e ^= s->x0.e; + s->x1.o ^= s->x0.o; + s->x3.e ^= s->x2.e; + s->x3.o ^= s->x2.o; + s->x0.e ^= s->x4.e; + s->x0.o ^= s->x4.o; + /* linear layer */ + t.x0.e = s->x0.e ^ ROR32(s->x0.o, 4); + t.x0.o = s->x0.o ^ ROR32(s->x0.e, 5); + t.x1.e = s->x1.e ^ ROR32(s->x1.e, 11); + t.x1.o = s->x1.o ^ ROR32(s->x1.o, 11); + t.x2.e = s->x2.e ^ ROR32(s->x2.o, 2); + t.x2.o = s->x2.o ^ ROR32(s->x2.e, 3); + t.x3.e = s->x3.e ^ ROR32(s->x3.o, 3); + t.x3.o = s->x3.o ^ ROR32(s->x3.e, 4); + t.x4.e = s->x4.e ^ ROR32(s->x4.e, 17); + t.x4.o = s->x4.o ^ ROR32(s->x4.o, 17); + s->x0.e ^= ROR32(t.x0.o, 9); + s->x0.o ^= ROR32(t.x0.e, 10); + s->x1.e ^= ROR32(t.x1.o, 19); + s->x1.o ^= ROR32(t.x1.e, 20); + s->x2.e ^= t.x2.o; + s->x2.o ^= ROR32(t.x2.e, 1); + s->x3.e ^= ROR32(t.x3.e, 5); + s->x3.o ^= ROR32(t.x3.o, 5); + s->x4.e ^= ROR32(t.x4.o, 3); + s->x4.o ^= ROR32(t.x4.e, 4); + s->x2.e = ~s->x2.e; + s->x2.o = ~s->x2.o; + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32/word.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32/word.h new file mode 100644 index 0000000..8ffcaaa --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32/word.h @@ -0,0 +1,117 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#include "config.h" + +typedef struct { + uint32_t e; + uint32_t o; +} word_t; + +__forceinline word_t WORD_T(uint64_t x) { + return (word_t){.o = x >> 32, .e = x}; +} + +__forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; } + +__forceinline uint64_t TOBI32(uint64_t in); + +__forceinline uint64_t FROMBI32(uint64_t in); + +__forceinline word_t U64TOWORD(uint64_t x) { + uint64_t w = TOBI32(x); + return (word_t){.o = w >> 32, .e = w}; +} + +__forceinline uint64_t WORDTOU64(word_t w) { + return FROMBI32((uint64_t)w.o << 32 | w.e); +} + +#define XOR(a, b) \ + do { \ + word_t tb = b; \ + (a).e ^= tb.e; \ + (a).o ^= tb.o; \ + } while (0) + +#define AND(a, b) \ + do { \ + word_t tb = b; \ + (a).e &= tb.e; \ + (a).o &= tb.o; \ + } while (0) + +__forceinline uint32_t ROR32(uint32_t x, int n) { + return x >> n | x << (32 - n); +} + +__forceinline word_t ROR64(word_t x, int n) { + word_t r; + r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); + r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); + return r; +} + +__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { + word_t r; + r.o = lo2hi.o << 16 | hi2lo.o >> 16; + r.e = lo2hi.e << 16 | hi2lo.e >> 16; + return r; +} + +__forceinline int NOTZERO(word_t a, word_t b) { + int result = 0; + for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i]; + for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i]; + return result; +} + +/* set padding byte in 64-bit Ascon word */ +__forceinline word_t PAD(int i) { + return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32); +} + +/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ +__forceinline word_t XMASK(int n) { + uint32_t mask = 0x0fffffff >> (n * 4 - 4); + return WORD_T((uint64_t)mask << 32 | mask); +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t TOBI32(uint64_t in) { + uint32_t hi = in >> 32; + uint32_t lo = in; + uint32_t r0, r1; + r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); + r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); + r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); + r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); + r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); + r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); + r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); + r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); + r0 = (lo & 0x0000FFFF) | (hi << 16); + r1 = (lo >> 16) | (hi & 0xFFFF0000); + return (uint64_t)r1 << 32 | r0; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t FROMBI32(uint64_t in) { + uint32_t r0 = in; + uint32_t r1 = in >> 32; + uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); + uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000); + r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); + r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); + r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); + r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); + r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); + r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); + r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); + r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); + return (uint64_t)hi << 32 | lo; +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/api.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/api.h index ae8c7f6..ad0325e 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/api.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/api.h @@ -1 +1,3 @@ #define CRYPTO_BYTES 32 +#define ASCON_RATE 8 +#define ASCON_HASH diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/ascon.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/ascon.h new file mode 100644 index 0000000..10a5b6e --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/ascon.h @@ -0,0 +1,19 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#include "config.h" +#include "word.h" + +typedef struct { + word_t x0, x1, x2, x3, x4; +} state_t; + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k); +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen); +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); +void ascon_final(state_t* s, const uint8_t* k); + +#endif // ASCON_H_ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/config.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/config.h new file mode 100644 index 0000000..4242e2e --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/config.h @@ -0,0 +1,34 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* inline the Ascon mode */ +#ifndef ASCON_INLINE_MODE +#define ASCON_INLINE_MODE 1 +#endif + +/* inline the Ascon permutations */ +#ifndef ASCON_INLINE_PERM +#define ASCON_INLINE_PERM 0 +#endif + +/* single function for all permutations */ +#ifndef ASCON_SINGLE_PERM +#define ASCON_SINGLE_PERM 0 +#endif + +/* unroll the permutation loops */ +#ifndef ASCON_UNROLL_LOOPS +#define ASCON_UNROLL_LOOPS 1 +#endif + +/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ +#ifndef ASCON_DATA_ACCESS +#define ASCON_DATA_ACCESS 'H' +#endif + +/* make sure __forceinline is supported */ +#ifndef __forceinline +#define __forceinline inline __attribute__((always_inline)) +#endif + +#endif /* CONFIG_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/endian.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/endian.h index ffbdcd9..3944360 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/endian.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/endian.h @@ -3,29 +3,37 @@ #if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -// macros for big endian machines +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif #define U64BIG(x) (x) #define U32BIG(x) (x) #define U16BIG(x) (x) #elif defined(_MSC_VER) || \ - (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -// macros for little endian machines -#define U64BIG(x) \ - ((((x) & 0x00000000000000FFULL) << 56) | (((x) & 0x000000000000FF00ULL) << 40) | \ - (((x) & 0x0000000000FF0000ULL) << 24) | (((x) & 0x00000000FF000000ULL) << 8) | \ - (((x) & 0x000000FF00000000ULL) >> 8) | (((x) & 0x0000FF0000000000ULL) >> 24) | \ - (((x) & 0x00FF000000000000ULL) >> 40) | (((x) & 0xFF00000000000000ULL) >> 56)) +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) #define U32BIG(x) \ - ((((x) & 0x000000FF) << 24) | (((x) & 0x0000FF00) << 8) | \ - (((x) & 0x00FF0000) >> 8) | (((x) & 0xFF000000) >> 24)) -#define U16BIG(x) \ - ((((x) & 0x00FF) << 8) | (((x) & 0xFF00) >> 8)) + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) #else -#error "ascon byte order macros not defined in endian.h" +#error "Ascon byte order macros not defined in endian.h" #endif -#endif // ENDIAN_H_ - +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/hash.c b/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/hash.c index 19da8d5..34dec63 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/hash.c +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/hash.c @@ -1,59 +1,51 @@ #include "api.h" -#include "endian.h" +#include "ascon.h" +#include "loadstore.h" #include "permutations.h" - -#define RATE (64 / 8) -#define PA_ROUNDS 12 - -int crypto_hash(unsigned char* out, const unsigned char* in, - unsigned long long inlen) { - u32_2 x0, x1, x2, x3, x4; - u32_2 t0; - u64 outlen, tmp0; - u32 i; - - // initialization - x0.e = 0xa540dbc7; - x0.o = 0xf9afb5c6; - x1.e = 0x1445a340; - x1.o = 0xbd249301; - x2.e = 0x604d4fc8; - x2.o = 0xcb9ba8b5; - x3.e = 0x94514c98; - x3.o = 0x12a4eede; - x4.e = 0x6339f398; - x4.o = 0x4bca84c0; - - // absorb plaintext - while (inlen >= RATE) { - to_bit_interleaving(t0, U64BIG(*(u64*)in)); - x0.e ^= t0.e; - x0.o ^= t0.o; - P12(); - inlen -= RATE; - in += RATE; +#include "printstate.h" + +int crypto_hash(uint8_t* out, const uint8_t* in, uint64_t len) { + state_t s; + + /* initialization */ +#ifdef ASCON_HASH + s.x0 = ASCON_HASH_IV0; + s.x1 = ASCON_HASH_IV1; + s.x2 = ASCON_HASH_IV2; + s.x3 = ASCON_HASH_IV3; + s.x4 = ASCON_HASH_IV4; +#endif +#ifdef ASCON_XOF + s.x0 = ASCON_XOF_IV0; + s.x1 = ASCON_XOF_IV1; + s.x2 = ASCON_XOF_IV2; + s.x3 = ASCON_XOF_IV3; + s.x4 = ASCON_XOF_IV4; +#endif + printstate("initialization", &s); + + /* absorb plaintext */ + while (len >= ASCON_RATE) { + XOR(s.x0, LOAD64(in)); + P12(&s); + in += ASCON_RATE; + len -= ASCON_RATE; } - tmp0 = 0; - for (i = 0; i < inlen; ++i, ++in) tmp0 |= INS_BYTE64(*in, i); - tmp0 |= INS_BYTE64(0x80, inlen); - to_bit_interleaving(t0, tmp0); - x0.e ^= t0.e; - x0.o ^= t0.o; - - P12(); - - // squeeze output - outlen = CRYPTO_BYTES; - while (outlen > RATE) { - from_bit_interleaving(tmp0, x0); - *(u64*)out = U64BIG(tmp0); - P12(); - outlen -= RATE; - out += RATE; + if (len) XOR(s.x0, LOAD(in, len)); + XOR(s.x0, PAD(len)); + P12(&s); + printstate("absorb plaintext", &s); + + /* squeeze output */ + len = CRYPTO_BYTES; + while (len > ASCON_RATE) { + STORE64(out, s.x0); + P12(&s); + out += ASCON_RATE; + len -= ASCON_RATE; } - from_bit_interleaving(tmp0, x0); - *(u64*)out = U64BIG(tmp0); + STORE64(out, s.x0); + printstate("squeeze output", &s); return 0; } - diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/loadstore.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/loadstore.h new file mode 100644 index 0000000..6cb0cca --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/loadstore.h @@ -0,0 +1,118 @@ +#ifndef LOADSTORE_H_ +#define LOADSTORE_H_ + +#include + +#include "config.h" +#include "endian.h" +#include "word.h" + +/* 64-bit LSB mask (undefined for n == 0) */ +#define MASK(n) (~0ull >> (64 - (n))) + +/* get byte from Ascon 64-bit word */ +#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i)))) + +/* set byte in Ascon 64-bit word */ +#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i))) + +#if ASCON_DATA_ACCESS == 'W' + +#ifndef NDEBUG +#pragma message("Using wordwise data access") +#endif + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = U64BIG(*(uint64_t*)bytes); + return U64TOWORD(x); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = U64BIG(*(uint64_t*)bytes & MASK(8 * n)); + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(8 * n); + *(uint64_t*)bytes |= U64BIG(x); +} + +#elif ASCON_DATA_ACCESS == 'M' + +#ifndef NDEBUG +#pragma message("Using memcpy to access data") +#endif + +#include + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + memcpy((uint8_t*)&x, bytes, n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = U64BIG(WORDTOU64(w)); + memcpy(bytes, (uint8_t*)&x, n); +} + +#elif ASCON_DATA_ACCESS == 'B' + +#ifndef NDEBUG +#pragma message("Using bytewise data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#elif ASCON_DATA_ACCESS == 'H' + +#ifndef NDEBUG +#pragma message("Using hybrid data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + if (n == 8) + x = U64BIG(*(uint64_t*)bytes); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + if (n == 8) + *(uint64_t*)bytes = U64BIG(x); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#else +#error "Ascon data access macro not defined correctly" +#endif + +#endif /* LOADSTORE_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/permutations.c b/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/permutations.c new file mode 100644 index 0000000..1bca2ef --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/permutations.c @@ -0,0 +1,35 @@ +#include "permutations.h" + +#include "round.h" + +#if !ASCON_UNROLL_LOOPS || ASCON_SINGLE_PERM + +const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, + {0x6, 0xc}, {0x3, 0xc}, {0x6, 0x9}, {0x3, 0x9}, + {0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}}; + +#endif + +#if ASCON_INLINE_PERM + +#elif ASCON_SINGLE_PERM + +void P(state_t* s, uint8_t rounds) { + printstate(" permutation input", s); + for (int i = START(rounds); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s) { P12ROUNDS(s); } + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16 +void P8(state_t* s) { P8ROUNDS(s); } +#endif + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 +void P6(state_t* s) { P6ROUNDS(s); } +#endif + +#endif diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/permutations.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/permutations.h index 08b8d75..ef338f1 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/permutations.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/permutations.h @@ -1,157 +1,189 @@ #ifndef PERMUTATIONS_H_ #define PERMUTATIONS_H_ -#include "endian.h" - -typedef unsigned char u8; -typedef unsigned int u32; -typedef unsigned long long u64; - -typedef struct { - u32 e; - u32 o; -} u32_2; - -#define EXT_BYTE64(x, n) ((u8)((u64)(x) >> (8 * (7 - (n))))) -#define INS_BYTE64(x, n) ((u64)(x) << (8 * (7 - (n)))) - -// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 -#define to_bit_interleaving(out, in) \ - do { \ - u32 hi = (in) >> 32; \ - u32 lo = (u32)(in); \ - u32 r0, r1; \ - r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \ - r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \ - r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \ - r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \ - r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \ - r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \ - r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \ - r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \ - (out).e = (lo & 0x0000FFFF) | (hi << 16); \ - (out).o = (lo >> 16) | (hi & 0xFFFF0000); \ - } while (0) - -// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 -#define from_bit_interleaving(out, in) \ - do { \ - u32 lo = ((in).e & 0x0000FFFF) | ((in).o << 16); \ - u32 hi = ((in).e >> 16) | ((in).o & 0xFFFF0000); \ - u32 r0, r1; \ - r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \ - r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \ - r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \ - r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \ - r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \ - r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \ - r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \ - r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \ - out = (u64)hi << 32 | lo; \ - } while (0) - -#define ROUND(C_e, C_o) \ - do { \ - u32 reg0, reg1, reg2, reg3; \ - __asm__ __volatile__ ( \ - "eor %[x2_e], %[x2_e], #" #C_e "\n\t" \ - "eor %[x2_o], %[x2_o], #" #C_o "\n\t" \ - "eor %[x0_e], %[x0_e], %[x4_e]\n\t" \ - "eor %[x0_o], %[x0_o], %[x4_o]\n\t" \ - "eor %[x4_e], %[x4_e], %[x3_e]\n\t" \ - "eor %[x4_o], %[x4_o], %[x3_o]\n\t" \ - "eor %[x2_e], %[x2_e], %[x1_e]\n\t" \ - "eor %[x2_o], %[x2_o], %[x1_o]\n\t" \ - "bic %[reg0], %[x0_e], %[x4_e]\n\t" \ - "bic %[reg1], %[x4_e], %[x3_e]\n\t" \ - "bic %[reg2], %[x2_e], %[x1_e]\n\t" \ - "bic %[reg3], %[x1_e], %[x0_e]\n\t" \ - "eor %[x2_e], %[x2_e], %[reg1]\n\t" \ - "eor %[x0_e], %[x0_e], %[reg2]\n\t" \ - "eor %[x4_e], %[x4_e], %[reg3]\n\t" \ - "bic %[reg3], %[x3_e], %[x2_e]\n\t" \ - "eor %[x3_e], %[x3_e], %[reg0]\n\t" \ - "bic %[reg2], %[x0_o], %[x4_o]\n\t" \ - "bic %[reg0], %[x2_o], %[x1_o]\n\t" \ - "bic %[reg1], %[x4_o], %[x3_o]\n\t" \ - "eor %[x1_e], %[x1_e], %[reg3]\n\t" \ - "eor %[x0_o], %[x0_o], %[reg0]\n\t" \ - "eor %[x2_o], %[x2_o], %[reg1]\n\t" \ - "bic %[reg3], %[x1_o], %[x0_o]\n\t" \ - "bic %[reg0], %[x3_o], %[x2_o]\n\t" \ - "eor %[x3_o], %[x3_o], %[reg2]\n\t" \ - "eor %[x3_o], %[x3_o], %[x2_o]\n\t" \ - "eor %[x4_o], %[x4_o], %[reg3]\n\t" \ - "eor %[x1_o], %[x1_o], %[reg0]\n\t" \ - "eor %[x3_e], %[x3_e], %[x2_e]\n\t" \ - "eor %[x1_e], %[x1_e], %[x0_e]\n\t" \ - "eor %[x1_o], %[x1_o], %[x0_o]\n\t" \ - "eor %[x0_e], %[x0_e], %[x4_e]\n\t" \ - "eor %[x0_o], %[x0_o], %[x4_o]\n\t" \ - "mvn %[x2_e], %[x2_e]\n\t" \ - "mvn %[x2_o], %[x2_o]\n\t" \ - "eor %[reg0], %[x0_e], %[x0_o], ror #4\n\t" \ - "eor %[reg1], %[x0_o], %[x0_e], ror #5\n\t" \ - "eor %[reg2], %[x1_e], %[x1_e], ror #11\n\t" \ - "eor %[reg3], %[x1_o], %[x1_o], ror #11\n\t" \ - "eor %[x0_e], %[x0_e], %[reg1], ror #9\n\t" \ - "eor %[x0_o], %[x0_o], %[reg0], ror #10\n\t" \ - "eor %[x1_e], %[x1_e], %[reg3], ror #19\n\t" \ - "eor %[x1_o], %[x1_o], %[reg2], ror #20\n\t" \ - "eor %[reg0], %[x2_e], %[x2_o], ror #2\n\t" \ - "eor %[reg1], %[x2_o], %[x2_e], ror #3\n\t" \ - "eor %[reg2], %[x3_e], %[x3_o], ror #3\n\t" \ - "eor %[reg3], %[x3_o], %[x3_e], ror #4\n\t" \ - "eor %[x2_e], %[x2_e], %[reg1]\n\t" \ - "eor %[x2_o], %[x2_o], %[reg0], ror #1\n\t" \ - "eor %[x3_e], %[x3_e], %[reg2], ror #5\n\t" \ - "eor %[x3_o], %[x3_o], %[reg3], ror #5\n\t" \ - "eor %[reg0], %[x4_e], %[x4_e], ror #17\n\t" \ - "eor %[reg1], %[x4_o], %[x4_o], ror #17\n\t" \ - "eor %[x4_e], %[x4_e], %[reg1], ror #3\n\t" \ - "eor %[x4_o], %[x4_o], %[reg0], ror #4\n\t" \ - : [x0_e] "+r" (x0.e), [x1_e] "+r" (x1.e), [x2_e] "+r" (x2.e), [x3_e] "+r" (x3.e), [x4_e] "+r" (x4.e), \ - [x0_o] "+r" (x0.o), [x1_o] "+r" (x1.o), [x2_o] "+r" (x2.o), [x3_o] "+r" (x3.o), [x4_o] "+r" (x4.o), \ - [reg0] "=r" (reg0), [reg1] "=r" (reg1), [reg2] "=r" (reg2), [reg3] "=r" (reg3)::); \ - } while (0) - -#define P12() \ - do { \ - ROUND(0xc, 0xc); \ - ROUND(0x9, 0xc); \ - ROUND(0xc, 0x9); \ - ROUND(0x9, 0x9); \ - ROUND(0x6, 0xc); \ - ROUND(0x3, 0xc); \ - ROUND(0x6, 0x9); \ - ROUND(0x3, 0x9); \ - ROUND(0xc, 0x6); \ - ROUND(0x9, 0x6); \ - ROUND(0xc, 0x3); \ - ROUND(0x9, 0x3); \ - } while (0) - -#define P8() \ - do { \ - ROUND(0x6, 0xc); \ - ROUND(0x3, 0xc); \ - ROUND(0x6, 0x9); \ - ROUND(0x3, 0x9); \ - ROUND(0xc, 0x6); \ - ROUND(0x9, 0x6); \ - ROUND(0xc, 0x3); \ - ROUND(0x9, 0x3); \ - } while (0) - -#define P6() \ - do { \ - ROUND(0x6, 0x9); \ - ROUND(0x3, 0x9); \ - ROUND(0xc, 0x6); \ - ROUND(0x9, 0x6); \ - ROUND(0xc, 0x3); \ - ROUND(0x9, 0x3); \ - } while (0) - -#endif // PERMUTATIONS_H_ +#include +#include "api.h" +#include "ascon.h" +#include "printstate.h" +#include "round.h" + +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 + +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 + +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 + +#define ASCON_HASH_BYTES 32 + +#define ASCON_128_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_128A_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) + +#define ASCON_80PQ_IV \ + U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_HASH_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) + +#define ASCON_XOF_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) + +#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) +#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) +#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) +#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) +#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) + +#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) +#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) +#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) +#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) +#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 +#define IV ASCON_128_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#if ASCON_RATE == 16 +#define IV ASCON_128A_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define PB P8 +#endif + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 +#define IV ASCON_80PQ_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#define START(n) (12 - n) + +#if ASCON_UNROLL_LOOPS + +__forceinline void P12ROUNDS(state_t* s) { + ROUND(s, 0xc, 0xc); + ROUND(s, 0x9, 0xc); + ROUND(s, 0xc, 0x9); + ROUND(s, 0x9, 0x9); + ROUND(s, 0x6, 0xc); + ROUND(s, 0x3, 0xc); + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +__forceinline void P8ROUNDS(state_t* s) { + ROUND(s, 0x6, 0xc); + ROUND(s, 0x3, 0xc); + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +__forceinline void P6ROUNDS(state_t* s) { + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +#else /* !ASCON_UNROLL_LOOPS */ + +extern const uint8_t constants[][2]; + +__forceinline void P12ROUNDS(state_t* s) { + for (int i = START(12); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +__forceinline void P8ROUNDS(state_t* s) { + for (int i = START(8); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +__forceinline void P6ROUNDS(state_t* s) { + for (int i = START(6); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +#endif + +#if ASCON_INLINE_PERM + +__forceinline void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +__forceinline void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} + +__forceinline void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#elif ASCON_SINGLE_PERM + +#define P12(s) P(s, 12) +#define P8(s) P(s, 8) +#define P6(s) P(s, 6) + +void P(state_t* s, uint8_t rounds); + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s); +void P8(state_t* s); +void P6(state_t* s); + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#endif + +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/printstate.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/printstate.h new file mode 100644 index 0000000..34bd476 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/printstate.h @@ -0,0 +1,31 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/round.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/round.h new file mode 100644 index 0000000..06da1ca --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/round.h @@ -0,0 +1,102 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" + +__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { + *K0 = WORD_T(0); + *K1 = WORD_T(0); + *K2 = WORD_T(0); +} + +__forceinline void PINIT(state_t* s) { + s->x0 = WORD_T(0); + s->x1 = WORD_T(0); + s->x2 = WORD_T(0); + s->x3 = WORD_T(0); + s->x4 = WORD_T(0); +} + +__forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) { + uint32_t tmp0, tmp1, tmp2, tmp3; + /* clang-format off */ + __asm__ __volatile__( \ + "eor %[x2_e], %[x2_e], %[C_e]\n\t" \ + "eor %[x2_o], %[x2_o], %[C_o]\n\t" \ + "eor %[x0_e], %[x0_e], %[x4_e]\n\t" \ + "eor %[x0_o], %[x0_o], %[x4_o]\n\t" \ + "eor %[x4_e], %[x4_e], %[x3_e]\n\t" \ + "eor %[x4_o], %[x4_o], %[x3_o]\n\t" \ + "eor %[x2_e], %[x2_e], %[x1_e]\n\t" \ + "eor %[x2_o], %[x2_o], %[x1_o]\n\t" \ + "bic %[tmp0], %[x0_e], %[x4_e]\n\t" \ + "bic %[tmp1], %[x4_e], %[x3_e]\n\t" \ + "bic %[tmp2], %[x2_e], %[x1_e]\n\t" \ + "bic %[tmp3], %[x1_e], %[x0_e]\n\t" \ + "eor %[x2_e], %[x2_e], %[tmp1]\n\t" \ + "eor %[x0_e], %[x0_e], %[tmp2]\n\t" \ + "eor %[x4_e], %[x4_e], %[tmp3]\n\t" \ + "bic %[tmp3], %[x3_e], %[x2_e]\n\t" \ + "eor %[x3_e], %[x3_e], %[tmp0]\n\t" \ + "bic %[tmp2], %[x0_o], %[x4_o]\n\t" \ + "bic %[tmp0], %[x2_o], %[x1_o]\n\t" \ + "bic %[tmp1], %[x4_o], %[x3_o]\n\t" \ + "eor %[x1_e], %[x1_e], %[tmp3]\n\t" \ + "eor %[x0_o], %[x0_o], %[tmp0]\n\t" \ + "eor %[x2_o], %[x2_o], %[tmp1]\n\t" \ + "bic %[tmp3], %[x1_o], %[x0_o]\n\t" \ + "bic %[tmp0], %[x3_o], %[x2_o]\n\t" \ + "eor %[x3_o], %[x3_o], %[tmp2]\n\t" \ + "eor %[x3_o], %[x3_o], %[x2_o]\n\t" \ + "eor %[x4_o], %[x4_o], %[tmp3]\n\t" \ + "eor %[x1_o], %[x1_o], %[tmp0]\n\t" \ + "eor %[x3_e], %[x3_e], %[x2_e]\n\t" \ + "eor %[x1_e], %[x1_e], %[x0_e]\n\t" \ + "eor %[x1_o], %[x1_o], %[x0_o]\n\t" \ + "eor %[x0_e], %[x0_e], %[x4_e]\n\t" \ + "eor %[x0_o], %[x0_o], %[x4_o]\n\t" \ + "mvn %[x2_e], %[x2_e]\n\t" \ + "mvn %[x2_o], %[x2_o]\n\t" \ + "eor %[tmp0], %[x0_e], %[x0_o], ror #4\n\t" \ + "eor %[tmp1], %[x0_o], %[x0_e], ror #5\n\t" \ + "eor %[tmp2], %[x1_e], %[x1_e], ror #11\n\t" \ + "eor %[tmp3], %[x1_o], %[x1_o], ror #11\n\t" \ + "eor %[x0_e], %[x0_e], %[tmp1], ror #9\n\t" \ + "eor %[x0_o], %[x0_o], %[tmp0], ror #10\n\t" \ + "eor %[x1_e], %[x1_e], %[tmp3], ror #19\n\t" \ + "eor %[x1_o], %[x1_o], %[tmp2], ror #20\n\t" \ + "eor %[tmp0], %[x2_e], %[x2_o], ror #2\n\t" \ + "eor %[tmp1], %[x2_o], %[x2_e], ror #3\n\t" \ + "eor %[tmp2], %[x3_e], %[x3_o], ror #3\n\t" \ + "eor %[tmp3], %[x3_o], %[x3_e], ror #4\n\t" \ + "eor %[x2_e], %[x2_e], %[tmp1]\n\t" \ + "eor %[x2_o], %[x2_o], %[tmp0], ror #1\n\t" \ + "eor %[x3_e], %[x3_e], %[tmp2], ror #5\n\t" \ + "eor %[x3_o], %[x3_o], %[tmp3], ror #5\n\t" \ + "eor %[tmp0], %[x4_e], %[x4_e], ror #17\n\t" \ + "eor %[tmp1], %[x4_o], %[x4_o], ror #17\n\t" \ + "eor %[x4_e], %[x4_e], %[tmp1], ror #3\n\t" \ + "eor %[x4_o], %[x4_o], %[tmp0], ror #4\n\t" \ + : [ x0_e ] "+r"(s->x0.e), \ + [ x1_e ] "+r"(s->x1.e), \ + [ x2_e ] "+r"(s->x2.e), \ + [ x3_e ] "+r"(s->x3.e), \ + [ x4_e ] "+r"(s->x4.e), \ + [ x0_o ] "+r"(s->x0.o), \ + [ x1_o ] "+r"(s->x1.o), \ + [ x2_o ] "+r"(s->x2.o), \ + [ x3_o ] "+r"(s->x3.o), \ + [ x4_o ] "+r"(s->x4.o), \ + [ tmp0 ] "=r"(tmp0), \ + [ tmp1 ] "=r"(tmp1), \ + [ tmp2 ] "=r"(tmp2), \ + [ tmp3 ] "=r"(tmp3) \ + : [ C_e ] "i"(C_e), \ + [ C_o ] "i"(C_o) \ + : ); + /* clang-format on */ + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/word.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/word.h new file mode 100644 index 0000000..8ffcaaa --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_arm/word.h @@ -0,0 +1,117 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#include "config.h" + +typedef struct { + uint32_t e; + uint32_t o; +} word_t; + +__forceinline word_t WORD_T(uint64_t x) { + return (word_t){.o = x >> 32, .e = x}; +} + +__forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; } + +__forceinline uint64_t TOBI32(uint64_t in); + +__forceinline uint64_t FROMBI32(uint64_t in); + +__forceinline word_t U64TOWORD(uint64_t x) { + uint64_t w = TOBI32(x); + return (word_t){.o = w >> 32, .e = w}; +} + +__forceinline uint64_t WORDTOU64(word_t w) { + return FROMBI32((uint64_t)w.o << 32 | w.e); +} + +#define XOR(a, b) \ + do { \ + word_t tb = b; \ + (a).e ^= tb.e; \ + (a).o ^= tb.o; \ + } while (0) + +#define AND(a, b) \ + do { \ + word_t tb = b; \ + (a).e &= tb.e; \ + (a).o &= tb.o; \ + } while (0) + +__forceinline uint32_t ROR32(uint32_t x, int n) { + return x >> n | x << (32 - n); +} + +__forceinline word_t ROR64(word_t x, int n) { + word_t r; + r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); + r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); + return r; +} + +__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { + word_t r; + r.o = lo2hi.o << 16 | hi2lo.o >> 16; + r.e = lo2hi.e << 16 | hi2lo.e >> 16; + return r; +} + +__forceinline int NOTZERO(word_t a, word_t b) { + int result = 0; + for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i]; + for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i]; + return result; +} + +/* set padding byte in 64-bit Ascon word */ +__forceinline word_t PAD(int i) { + return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32); +} + +/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ +__forceinline word_t XMASK(int n) { + uint32_t mask = 0x0fffffff >> (n * 4 - 4); + return WORD_T((uint64_t)mask << 32 | mask); +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t TOBI32(uint64_t in) { + uint32_t hi = in >> 32; + uint32_t lo = in; + uint32_t r0, r1; + r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); + r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); + r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); + r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); + r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); + r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); + r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); + r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); + r0 = (lo & 0x0000FFFF) | (hi << 16); + r1 = (lo >> 16) | (hi & 0xFFFF0000); + return (uint64_t)r1 << 32 | r0; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t FROMBI32(uint64_t in) { + uint32_t r0 = in; + uint32_t r1 = in >> 32; + uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); + uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000); + r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); + r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); + r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); + r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); + r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); + r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); + r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); + r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); + return (uint64_t)hi << 32 | lo; +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/api.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/api.h index ae8c7f6..ad0325e 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/api.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/api.h @@ -1 +1,3 @@ #define CRYPTO_BYTES 32 +#define ASCON_RATE 8 +#define ASCON_HASH diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/ascon.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/ascon.h new file mode 100644 index 0000000..10a5b6e --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/ascon.h @@ -0,0 +1,19 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#include "config.h" +#include "word.h" + +typedef struct { + word_t x0, x1, x2, x3, x4; +} state_t; + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k); +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen); +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); +void ascon_final(state_t* s, const uint8_t* k); + +#endif // ASCON_H_ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/config.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/config.h new file mode 100644 index 0000000..5ccce77 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/config.h @@ -0,0 +1,34 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* inline the Ascon mode */ +#ifndef ASCON_INLINE_MODE +#define ASCON_INLINE_MODE 1 +#endif + +/* inline the Ascon permutations */ +#ifndef ASCON_INLINE_PERM +#define ASCON_INLINE_PERM 0 +#endif + +/* single function for all permutations */ +#ifndef ASCON_SINGLE_PERM +#define ASCON_SINGLE_PERM 0 +#endif + +/* unroll the permutation loops */ +#ifndef ASCON_UNROLL_LOOPS +#define ASCON_UNROLL_LOOPS 1 +#endif + +/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ +#ifndef ASCON_DATA_ACCESS +#define ASCON_DATA_ACCESS 'M' +#endif + +/* make sure __forceinline is supported */ +#ifndef __forceinline +#define __forceinline inline __attribute__((always_inline)) +#endif + +#endif /* CONFIG_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/endian.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/endian.h index ffbdcd9..3944360 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/endian.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/endian.h @@ -3,29 +3,37 @@ #if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -// macros for big endian machines +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif #define U64BIG(x) (x) #define U32BIG(x) (x) #define U16BIG(x) (x) #elif defined(_MSC_VER) || \ - (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -// macros for little endian machines -#define U64BIG(x) \ - ((((x) & 0x00000000000000FFULL) << 56) | (((x) & 0x000000000000FF00ULL) << 40) | \ - (((x) & 0x0000000000FF0000ULL) << 24) | (((x) & 0x00000000FF000000ULL) << 8) | \ - (((x) & 0x000000FF00000000ULL) >> 8) | (((x) & 0x0000FF0000000000ULL) >> 24) | \ - (((x) & 0x00FF000000000000ULL) >> 40) | (((x) & 0xFF00000000000000ULL) >> 56)) +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) #define U32BIG(x) \ - ((((x) & 0x000000FF) << 24) | (((x) & 0x0000FF00) << 8) | \ - (((x) & 0x00FF0000) >> 8) | (((x) & 0xFF000000) >> 24)) -#define U16BIG(x) \ - ((((x) & 0x00FF) << 8) | (((x) & 0xFF00) >> 8)) + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) #else -#error "ascon byte order macros not defined in endian.h" +#error "Ascon byte order macros not defined in endian.h" #endif -#endif // ENDIAN_H_ - +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/hash.c b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/hash.c index 19da8d5..34dec63 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/hash.c +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/hash.c @@ -1,59 +1,51 @@ #include "api.h" -#include "endian.h" +#include "ascon.h" +#include "loadstore.h" #include "permutations.h" - -#define RATE (64 / 8) -#define PA_ROUNDS 12 - -int crypto_hash(unsigned char* out, const unsigned char* in, - unsigned long long inlen) { - u32_2 x0, x1, x2, x3, x4; - u32_2 t0; - u64 outlen, tmp0; - u32 i; - - // initialization - x0.e = 0xa540dbc7; - x0.o = 0xf9afb5c6; - x1.e = 0x1445a340; - x1.o = 0xbd249301; - x2.e = 0x604d4fc8; - x2.o = 0xcb9ba8b5; - x3.e = 0x94514c98; - x3.o = 0x12a4eede; - x4.e = 0x6339f398; - x4.o = 0x4bca84c0; - - // absorb plaintext - while (inlen >= RATE) { - to_bit_interleaving(t0, U64BIG(*(u64*)in)); - x0.e ^= t0.e; - x0.o ^= t0.o; - P12(); - inlen -= RATE; - in += RATE; +#include "printstate.h" + +int crypto_hash(uint8_t* out, const uint8_t* in, uint64_t len) { + state_t s; + + /* initialization */ +#ifdef ASCON_HASH + s.x0 = ASCON_HASH_IV0; + s.x1 = ASCON_HASH_IV1; + s.x2 = ASCON_HASH_IV2; + s.x3 = ASCON_HASH_IV3; + s.x4 = ASCON_HASH_IV4; +#endif +#ifdef ASCON_XOF + s.x0 = ASCON_XOF_IV0; + s.x1 = ASCON_XOF_IV1; + s.x2 = ASCON_XOF_IV2; + s.x3 = ASCON_XOF_IV3; + s.x4 = ASCON_XOF_IV4; +#endif + printstate("initialization", &s); + + /* absorb plaintext */ + while (len >= ASCON_RATE) { + XOR(s.x0, LOAD64(in)); + P12(&s); + in += ASCON_RATE; + len -= ASCON_RATE; } - tmp0 = 0; - for (i = 0; i < inlen; ++i, ++in) tmp0 |= INS_BYTE64(*in, i); - tmp0 |= INS_BYTE64(0x80, inlen); - to_bit_interleaving(t0, tmp0); - x0.e ^= t0.e; - x0.o ^= t0.o; - - P12(); - - // squeeze output - outlen = CRYPTO_BYTES; - while (outlen > RATE) { - from_bit_interleaving(tmp0, x0); - *(u64*)out = U64BIG(tmp0); - P12(); - outlen -= RATE; - out += RATE; + if (len) XOR(s.x0, LOAD(in, len)); + XOR(s.x0, PAD(len)); + P12(&s); + printstate("absorb plaintext", &s); + + /* squeeze output */ + len = CRYPTO_BYTES; + while (len > ASCON_RATE) { + STORE64(out, s.x0); + P12(&s); + out += ASCON_RATE; + len -= ASCON_RATE; } - from_bit_interleaving(tmp0, x0); - *(u64*)out = U64BIG(tmp0); + STORE64(out, s.x0); + printstate("squeeze output", &s); return 0; } - diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/loadstore.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/loadstore.h new file mode 100644 index 0000000..6cb0cca --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/loadstore.h @@ -0,0 +1,118 @@ +#ifndef LOADSTORE_H_ +#define LOADSTORE_H_ + +#include + +#include "config.h" +#include "endian.h" +#include "word.h" + +/* 64-bit LSB mask (undefined for n == 0) */ +#define MASK(n) (~0ull >> (64 - (n))) + +/* get byte from Ascon 64-bit word */ +#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i)))) + +/* set byte in Ascon 64-bit word */ +#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i))) + +#if ASCON_DATA_ACCESS == 'W' + +#ifndef NDEBUG +#pragma message("Using wordwise data access") +#endif + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = U64BIG(*(uint64_t*)bytes); + return U64TOWORD(x); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = U64BIG(*(uint64_t*)bytes & MASK(8 * n)); + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(8 * n); + *(uint64_t*)bytes |= U64BIG(x); +} + +#elif ASCON_DATA_ACCESS == 'M' + +#ifndef NDEBUG +#pragma message("Using memcpy to access data") +#endif + +#include + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + memcpy((uint8_t*)&x, bytes, n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = U64BIG(WORDTOU64(w)); + memcpy(bytes, (uint8_t*)&x, n); +} + +#elif ASCON_DATA_ACCESS == 'B' + +#ifndef NDEBUG +#pragma message("Using bytewise data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#elif ASCON_DATA_ACCESS == 'H' + +#ifndef NDEBUG +#pragma message("Using hybrid data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + if (n == 8) + x = U64BIG(*(uint64_t*)bytes); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + if (n == 8) + *(uint64_t*)bytes = U64BIG(x); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#else +#error "Ascon data access macro not defined correctly" +#endif + +#endif /* LOADSTORE_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/permutations.c b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/permutations.c new file mode 100644 index 0000000..1bca2ef --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/permutations.c @@ -0,0 +1,35 @@ +#include "permutations.h" + +#include "round.h" + +#if !ASCON_UNROLL_LOOPS || ASCON_SINGLE_PERM + +const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, + {0x6, 0xc}, {0x3, 0xc}, {0x6, 0x9}, {0x3, 0x9}, + {0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}}; + +#endif + +#if ASCON_INLINE_PERM + +#elif ASCON_SINGLE_PERM + +void P(state_t* s, uint8_t rounds) { + printstate(" permutation input", s); + for (int i = START(rounds); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s) { P12ROUNDS(s); } + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16 +void P8(state_t* s) { P8ROUNDS(s); } +#endif + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 +void P6(state_t* s) { P6ROUNDS(s); } +#endif + +#endif diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/permutations.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/permutations.h index 10ae468..ef338f1 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/permutations.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/permutations.h @@ -1,134 +1,189 @@ #ifndef PERMUTATIONS_H_ #define PERMUTATIONS_H_ -#include "endian.h" - -typedef unsigned char u8; -typedef unsigned int u32; -typedef unsigned long long u64; - -typedef struct { - u32 e; - u32 o; -} u32_2; - -#define EXT_BYTE64(x, n) ((u8)((u64)(x) >> (8 * (7 - (n))))) -#define INS_BYTE64(x, n) ((u64)(x) << (8 * (7 - (n)))) -#define ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n)))) - -// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 -#define to_bit_interleaving(out, in) \ - do { \ - u32 hi = (in) >> 32; \ - u32 lo = (u32)(in); \ - u32 r0, r1; \ - r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \ - r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \ - r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \ - r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \ - r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \ - r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \ - r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \ - r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \ - (out).e = (lo & 0x0000FFFF) | (hi << 16); \ - (out).o = (lo >> 16) | (hi & 0xFFFF0000); \ - } while (0) - -// Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 -#define from_bit_interleaving(out, in) \ - do { \ - u32 lo = ((in).e & 0x0000FFFF) | ((in).o << 16); \ - u32 hi = ((in).e >> 16) | ((in).o & 0xFFFF0000); \ - u32 r0, r1; \ - r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); \ - r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); \ - r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); \ - r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); \ - r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); \ - r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); \ - r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); \ - r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); \ - out = (u64)hi << 32 | lo; \ - } while (0) - -#define ROUND(C_e, C_o) \ - do { \ - /* round constant */ \ - x2.e ^= C_e; x2.o ^= C_o; \ - /* s-box layer */ \ - x0.e ^= x4.e; x0.o ^= x4.o; \ - x4.e ^= x3.e; x4.o ^= x3.o; \ - x2.e ^= x1.e; x2.o ^= x1.o; \ - t0.e = x0.e & (~x4.e); t0.o = x0.o & (~x4.o); \ - x0.e ^= x2.e & (~x1.e); x0.o ^= x2.o & (~x1.o); \ - x2.e ^= x4.e & (~x3.e); x2.o ^= x4.o & (~x3.o); \ - x4.e ^= x1.e & (~x0.e); x4.o ^= x1.o & (~x0.o); \ - x1.e ^= x3.e & (~x2.e); x1.o ^= x3.o & (~x2.o); \ - x3.e ^= t0.e; x3.o ^= t0.o; \ - x1.e ^= x0.e; x1.o ^= x0.o; \ - x3.e ^= x2.e; x3.o ^= x2.o; \ - x0.e ^= x4.e; x0.o ^= x4.o; \ - /* linear layer */ \ - t0.e = x0.e ^ ROTR32(x0.o, 4); \ - t0.o = x0.o ^ ROTR32(x0.e, 5); \ - x0.e ^= ROTR32(t0.o, 9); \ - x0.o ^= ROTR32(t0.e, 10); \ - t0.e = x1.e ^ ROTR32(x1.e, 11); \ - t0.o = x1.o ^ ROTR32(x1.o, 11); \ - x1.e ^= ROTR32(t0.o, 19); \ - x1.o ^= ROTR32(t0.e, 20); \ - t0.e = x2.e ^ ROTR32(x2.o, 2); \ - t0.o = x2.o ^ ROTR32(x2.e, 3); \ - x2.e ^= t0.o; \ - x2.o ^= ROTR32(t0.e, 1); \ - t0.e = x3.e ^ ROTR32(x3.o, 3); \ - t0.o = x3.o ^ ROTR32(x3.e, 4); \ - x3.e ^= ROTR32(t0.e, 5); \ - x3.o ^= ROTR32(t0.o, 5); \ - t0.e = x4.e ^ ROTR32(x4.e, 17); \ - t0.o = x4.o ^ ROTR32(x4.o, 17); \ - x4.e ^= ROTR32(t0.o, 3); \ - x4.o ^= ROTR32(t0.e, 4); \ - x2.e = ~x2.e; x2.o = ~x2.o; \ - } while(0) - -#define P12() \ - do { \ - ROUND(0xc, 0xc); \ - ROUND(0x9, 0xc); \ - ROUND(0xc, 0x9); \ - ROUND(0x9, 0x9); \ - ROUND(0x6, 0xc); \ - ROUND(0x3, 0xc); \ - ROUND(0x6, 0x9); \ - ROUND(0x3, 0x9); \ - ROUND(0xc, 0x6); \ - ROUND(0x9, 0x6); \ - ROUND(0xc, 0x3); \ - ROUND(0x9, 0x3); \ - } while (0) - -#define P8() \ - do { \ - ROUND(0x6, 0xc); \ - ROUND(0x3, 0xc); \ - ROUND(0x6, 0x9); \ - ROUND(0x3, 0x9); \ - ROUND(0xc, 0x6); \ - ROUND(0x9, 0x6); \ - ROUND(0xc, 0x3); \ - ROUND(0x9, 0x3); \ - } while (0) - -#define P6() \ - do { \ - ROUND(0x6, 0x9); \ - ROUND(0x3, 0x9); \ - ROUND(0xc, 0x6); \ - ROUND(0x9, 0x6); \ - ROUND(0xc, 0x3); \ - ROUND(0x9, 0x3); \ - } while (0) - -#endif // PERMUTATIONS_H_ +#include +#include "api.h" +#include "ascon.h" +#include "printstate.h" +#include "round.h" + +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 + +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 + +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 + +#define ASCON_HASH_BYTES 32 + +#define ASCON_128_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_128A_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) + +#define ASCON_80PQ_IV \ + U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_HASH_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) + +#define ASCON_XOF_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) + +#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) +#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) +#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) +#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) +#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) + +#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) +#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) +#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) +#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) +#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 +#define IV ASCON_128_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#if ASCON_RATE == 16 +#define IV ASCON_128A_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define PB P8 +#endif + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 +#define IV ASCON_80PQ_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#define START(n) (12 - n) + +#if ASCON_UNROLL_LOOPS + +__forceinline void P12ROUNDS(state_t* s) { + ROUND(s, 0xc, 0xc); + ROUND(s, 0x9, 0xc); + ROUND(s, 0xc, 0x9); + ROUND(s, 0x9, 0x9); + ROUND(s, 0x6, 0xc); + ROUND(s, 0x3, 0xc); + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +__forceinline void P8ROUNDS(state_t* s) { + ROUND(s, 0x6, 0xc); + ROUND(s, 0x3, 0xc); + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +__forceinline void P6ROUNDS(state_t* s) { + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +#else /* !ASCON_UNROLL_LOOPS */ + +extern const uint8_t constants[][2]; + +__forceinline void P12ROUNDS(state_t* s) { + for (int i = START(12); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +__forceinline void P8ROUNDS(state_t* s) { + for (int i = START(8); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +__forceinline void P6ROUNDS(state_t* s) { + for (int i = START(6); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +#endif + +#if ASCON_INLINE_PERM + +__forceinline void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +__forceinline void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} + +__forceinline void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#elif ASCON_SINGLE_PERM + +#define P12(s) P(s, 12) +#define P8(s) P(s, 8) +#define P6(s) P(s, 6) + +void P(state_t* s, uint8_t rounds); + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s); +void P8(state_t* s); +void P6(state_t* s); + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#endif + +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/printstate.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/printstate.h new file mode 100644 index 0000000..34bd476 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/printstate.h @@ -0,0 +1,31 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/round.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/round.h new file mode 100644 index 0000000..bc7a0cd --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/round.h @@ -0,0 +1,77 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" + +__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { + *K0 = WORD_T(0); + *K1 = WORD_T(0); + *K2 = WORD_T(0); +} + +__forceinline void PINIT(state_t* s) { + s->x0 = WORD_T(0); + s->x1 = WORD_T(0); + s->x2 = WORD_T(0); + s->x3 = WORD_T(0); + s->x4 = WORD_T(0); +} + +__forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) { + uint32_t tmp_e, tmp_o; + /* round constant */ + s->x2.e ^= C_e; + s->x2.o ^= C_o; + /* s-box layer */ + s->x0.e ^= s->x4.e; + s->x0.o ^= s->x4.o; + s->x4.e ^= s->x3.e; + s->x4.o ^= s->x3.o; + s->x2.e ^= s->x1.e; + s->x2.o ^= s->x1.o; + tmp_e = s->x0.e & (~s->x4.e); + tmp_o = s->x0.o & (~s->x4.o); + s->x0.e ^= s->x2.e & (~s->x1.e); + s->x0.o ^= s->x2.o & (~s->x1.o); + s->x2.e ^= s->x4.e & (~s->x3.e); + s->x2.o ^= s->x4.o & (~s->x3.o); + s->x4.e ^= s->x1.e & (~s->x0.e); + s->x4.o ^= s->x1.o & (~s->x0.o); + s->x1.e ^= s->x3.e & (~s->x2.e); + s->x1.o ^= s->x3.o & (~s->x2.o); + s->x3.e ^= tmp_e; + s->x3.o ^= tmp_o; + s->x1.e ^= s->x0.e; + s->x1.o ^= s->x0.o; + s->x3.e ^= s->x2.e; + s->x3.o ^= s->x2.o; + s->x0.e ^= s->x4.e; + s->x0.o ^= s->x4.o; + /* linear layer */ + tmp_e = s->x0.e ^ ROR32(s->x0.o, 4); + tmp_o = s->x0.o ^ ROR32(s->x0.e, 5); + s->x0.e ^= ROR32(tmp_o, 9); + s->x0.o ^= ROR32(tmp_e, 10); + tmp_e = s->x1.e ^ ROR32(s->x1.e, 11); + tmp_o = s->x1.o ^ ROR32(s->x1.o, 11); + s->x1.e ^= ROR32(tmp_o, 19); + s->x1.o ^= ROR32(tmp_e, 20); + tmp_e = s->x2.e ^ ROR32(s->x2.o, 2); + tmp_o = s->x2.o ^ ROR32(s->x2.e, 3); + s->x2.e ^= tmp_o; + s->x2.o ^= ROR32(tmp_e, 1); + tmp_e = s->x3.e ^ ROR32(s->x3.o, 3); + tmp_o = s->x3.o ^ ROR32(s->x3.e, 4); + s->x3.e ^= ROR32(tmp_e, 5); + s->x3.o ^= ROR32(tmp_o, 5); + tmp_e = s->x4.e ^ ROR32(s->x4.e, 17); + tmp_o = s->x4.o ^ ROR32(s->x4.o, 17); + s->x4.e ^= ROR32(tmp_o, 3); + s->x4.o ^= ROR32(tmp_e, 4); + s->x2.e = ~s->x2.e; + s->x2.o = ~s->x2.o; + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/word.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/word.h new file mode 100644 index 0000000..8ffcaaa --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowreg/word.h @@ -0,0 +1,117 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#include "config.h" + +typedef struct { + uint32_t e; + uint32_t o; +} word_t; + +__forceinline word_t WORD_T(uint64_t x) { + return (word_t){.o = x >> 32, .e = x}; +} + +__forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; } + +__forceinline uint64_t TOBI32(uint64_t in); + +__forceinline uint64_t FROMBI32(uint64_t in); + +__forceinline word_t U64TOWORD(uint64_t x) { + uint64_t w = TOBI32(x); + return (word_t){.o = w >> 32, .e = w}; +} + +__forceinline uint64_t WORDTOU64(word_t w) { + return FROMBI32((uint64_t)w.o << 32 | w.e); +} + +#define XOR(a, b) \ + do { \ + word_t tb = b; \ + (a).e ^= tb.e; \ + (a).o ^= tb.o; \ + } while (0) + +#define AND(a, b) \ + do { \ + word_t tb = b; \ + (a).e &= tb.e; \ + (a).o &= tb.o; \ + } while (0) + +__forceinline uint32_t ROR32(uint32_t x, int n) { + return x >> n | x << (32 - n); +} + +__forceinline word_t ROR64(word_t x, int n) { + word_t r; + r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); + r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); + return r; +} + +__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { + word_t r; + r.o = lo2hi.o << 16 | hi2lo.o >> 16; + r.e = lo2hi.e << 16 | hi2lo.e >> 16; + return r; +} + +__forceinline int NOTZERO(word_t a, word_t b) { + int result = 0; + for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i]; + for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i]; + return result; +} + +/* set padding byte in 64-bit Ascon word */ +__forceinline word_t PAD(int i) { + return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32); +} + +/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ +__forceinline word_t XMASK(int n) { + uint32_t mask = 0x0fffffff >> (n * 4 - 4); + return WORD_T((uint64_t)mask << 32 | mask); +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t TOBI32(uint64_t in) { + uint32_t hi = in >> 32; + uint32_t lo = in; + uint32_t r0, r1; + r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); + r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); + r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); + r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); + r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); + r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); + r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); + r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); + r0 = (lo & 0x0000FFFF) | (hi << 16); + r1 = (lo >> 16) | (hi & 0xFFFF0000); + return (uint64_t)r1 << 32 | r0; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t FROMBI32(uint64_t in) { + uint32_t r0 = in; + uint32_t r1 = in >> 32; + uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); + uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000); + r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); + r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); + r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); + r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); + r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); + r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); + r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); + r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); + return (uint64_t)hi << 32 | lo; +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/api.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/api.h new file mode 100644 index 0000000..ad0325e --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/api.h @@ -0,0 +1,3 @@ +#define CRYPTO_BYTES 32 +#define ASCON_RATE 8 +#define ASCON_HASH diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/ascon.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/ascon.h new file mode 100644 index 0000000..aa685d3 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/ascon.h @@ -0,0 +1,24 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#include "config.h" +#include "word.h" + +typedef struct { + word_t x0, x1, x2, x3, x4; +} state_t; + +#define ASCON_AD 0 +#define ASCON_ENC 1 +#define ASCON_DEC 2 + +void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, + uint8_t mode); + +void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen, + const uint8_t* ad, uint64_t adlen, const uint8_t* npub, + const uint8_t* k, uint8_t mode); + +#endif // ASCON_H_ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/config.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/config.h new file mode 100644 index 0000000..19426ab --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/config.h @@ -0,0 +1,34 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* inline the Ascon mode */ +#ifndef ASCON_INLINE_MODE +#define ASCON_INLINE_MODE 1 +#endif + +/* inline the Ascon permutations */ +#ifndef ASCON_INLINE_PERM +#define ASCON_INLINE_PERM 0 +#endif + +/* single function for all permutations */ +#ifndef ASCON_SINGLE_PERM +#define ASCON_SINGLE_PERM 1 +#endif + +/* unroll the permutation loops */ +#ifndef ASCON_UNROLL_LOOPS +#define ASCON_UNROLL_LOOPS 1 +#endif + +/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ +#ifndef ASCON_DATA_ACCESS +#define ASCON_DATA_ACCESS 'B' +#endif + +/* make sure __forceinline is supported */ +#ifndef __forceinline +#define __forceinline inline __attribute__((always_inline)) +#endif + +#endif /* CONFIG_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/endian.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/endian.h new file mode 100644 index 0000000..3944360 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/endian.h @@ -0,0 +1,39 @@ +#ifndef ENDIAN_H_ +#define ENDIAN_H_ + +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif +#define U64BIG(x) (x) +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +#elif defined(_MSC_VER) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) +#define U32BIG(x) \ + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) + +#else +#error "Ascon byte order macros not defined in endian.h" +#endif + +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/hash.c b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/hash.c new file mode 100644 index 0000000..34dec63 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/hash.c @@ -0,0 +1,51 @@ +#include "api.h" +#include "ascon.h" +#include "loadstore.h" +#include "permutations.h" +#include "printstate.h" + +int crypto_hash(uint8_t* out, const uint8_t* in, uint64_t len) { + state_t s; + + /* initialization */ +#ifdef ASCON_HASH + s.x0 = ASCON_HASH_IV0; + s.x1 = ASCON_HASH_IV1; + s.x2 = ASCON_HASH_IV2; + s.x3 = ASCON_HASH_IV3; + s.x4 = ASCON_HASH_IV4; +#endif +#ifdef ASCON_XOF + s.x0 = ASCON_XOF_IV0; + s.x1 = ASCON_XOF_IV1; + s.x2 = ASCON_XOF_IV2; + s.x3 = ASCON_XOF_IV3; + s.x4 = ASCON_XOF_IV4; +#endif + printstate("initialization", &s); + + /* absorb plaintext */ + while (len >= ASCON_RATE) { + XOR(s.x0, LOAD64(in)); + P12(&s); + in += ASCON_RATE; + len -= ASCON_RATE; + } + if (len) XOR(s.x0, LOAD(in, len)); + XOR(s.x0, PAD(len)); + P12(&s); + printstate("absorb plaintext", &s); + + /* squeeze output */ + len = CRYPTO_BYTES; + while (len > ASCON_RATE) { + STORE64(out, s.x0); + P12(&s); + out += ASCON_RATE; + len -= ASCON_RATE; + } + STORE64(out, s.x0); + printstate("squeeze output", &s); + + return 0; +} diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/implementors b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/implementors new file mode 100644 index 0000000..b110c1a --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/implementors @@ -0,0 +1,2 @@ +Christoph Dobraunig +Martin Schläffer diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/loadstore.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/loadstore.h new file mode 100644 index 0000000..6cb0cca --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/loadstore.h @@ -0,0 +1,118 @@ +#ifndef LOADSTORE_H_ +#define LOADSTORE_H_ + +#include + +#include "config.h" +#include "endian.h" +#include "word.h" + +/* 64-bit LSB mask (undefined for n == 0) */ +#define MASK(n) (~0ull >> (64 - (n))) + +/* get byte from Ascon 64-bit word */ +#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i)))) + +/* set byte in Ascon 64-bit word */ +#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i))) + +#if ASCON_DATA_ACCESS == 'W' + +#ifndef NDEBUG +#pragma message("Using wordwise data access") +#endif + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = U64BIG(*(uint64_t*)bytes); + return U64TOWORD(x); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = U64BIG(*(uint64_t*)bytes & MASK(8 * n)); + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(8 * n); + *(uint64_t*)bytes |= U64BIG(x); +} + +#elif ASCON_DATA_ACCESS == 'M' + +#ifndef NDEBUG +#pragma message("Using memcpy to access data") +#endif + +#include + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + memcpy((uint8_t*)&x, bytes, n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = U64BIG(WORDTOU64(w)); + memcpy(bytes, (uint8_t*)&x, n); +} + +#elif ASCON_DATA_ACCESS == 'B' + +#ifndef NDEBUG +#pragma message("Using bytewise data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#elif ASCON_DATA_ACCESS == 'H' + +#ifndef NDEBUG +#pragma message("Using hybrid data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + if (n == 8) + x = U64BIG(*(uint64_t*)bytes); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + if (n == 8) + *(uint64_t*)bytes = U64BIG(x); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#else +#error "Ascon data access macro not defined correctly" +#endif + +#endif /* LOADSTORE_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/permutations.c b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/permutations.c new file mode 100644 index 0000000..1bca2ef --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/permutations.c @@ -0,0 +1,35 @@ +#include "permutations.h" + +#include "round.h" + +#if !ASCON_UNROLL_LOOPS || ASCON_SINGLE_PERM + +const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, + {0x6, 0xc}, {0x3, 0xc}, {0x6, 0x9}, {0x3, 0x9}, + {0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}}; + +#endif + +#if ASCON_INLINE_PERM + +#elif ASCON_SINGLE_PERM + +void P(state_t* s, uint8_t rounds) { + printstate(" permutation input", s); + for (int i = START(rounds); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s) { P12ROUNDS(s); } + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16 +void P8(state_t* s) { P8ROUNDS(s); } +#endif + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 +void P6(state_t* s) { P6ROUNDS(s); } +#endif + +#endif diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/permutations.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/permutations.h new file mode 100644 index 0000000..ef338f1 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/permutations.h @@ -0,0 +1,189 @@ +#ifndef PERMUTATIONS_H_ +#define PERMUTATIONS_H_ + +#include + +#include "api.h" +#include "ascon.h" +#include "printstate.h" +#include "round.h" + +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 + +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 + +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 + +#define ASCON_HASH_BYTES 32 + +#define ASCON_128_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_128A_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) + +#define ASCON_80PQ_IV \ + U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_HASH_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) + +#define ASCON_XOF_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) + +#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) +#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) +#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) +#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) +#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) + +#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) +#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) +#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) +#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) +#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 +#define IV ASCON_128_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#if ASCON_RATE == 16 +#define IV ASCON_128A_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define PB P8 +#endif + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 +#define IV ASCON_80PQ_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#define START(n) (12 - n) + +#if ASCON_UNROLL_LOOPS + +__forceinline void P12ROUNDS(state_t* s) { + ROUND(s, 0xc, 0xc); + ROUND(s, 0x9, 0xc); + ROUND(s, 0xc, 0x9); + ROUND(s, 0x9, 0x9); + ROUND(s, 0x6, 0xc); + ROUND(s, 0x3, 0xc); + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +__forceinline void P8ROUNDS(state_t* s) { + ROUND(s, 0x6, 0xc); + ROUND(s, 0x3, 0xc); + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +__forceinline void P6ROUNDS(state_t* s) { + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +#else /* !ASCON_UNROLL_LOOPS */ + +extern const uint8_t constants[][2]; + +__forceinline void P12ROUNDS(state_t* s) { + for (int i = START(12); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +__forceinline void P8ROUNDS(state_t* s) { + for (int i = START(8); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +__forceinline void P6ROUNDS(state_t* s) { + for (int i = START(6); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +#endif + +#if ASCON_INLINE_PERM + +__forceinline void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +__forceinline void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} + +__forceinline void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#elif ASCON_SINGLE_PERM + +#define P12(s) P(s, 12) +#define P8(s) P(s, 8) +#define P6(s) P(s, 6) + +void P(state_t* s, uint8_t rounds); + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s); +void P8(state_t* s); +void P6(state_t* s); + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#endif + +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/printstate.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/printstate.h new file mode 100644 index 0000000..34bd476 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/printstate.h @@ -0,0 +1,31 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/round.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/round.h new file mode 100644 index 0000000..d8ea3b6 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/round.h @@ -0,0 +1,85 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" + +__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { + *K0 = WORD_T(0); + *K1 = WORD_T(0); + *K2 = WORD_T(0); +} + +__forceinline void PINIT(state_t* s) { + s->x0 = WORD_T(0); + s->x1 = WORD_T(0); + s->x2 = WORD_T(0); + s->x3 = WORD_T(0); + s->x4 = WORD_T(0); +} + +__forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) { + state_t t; + /* round constant */ + s->x2.e ^= C_e; + s->x2.o ^= C_o; + /* s-box layer */ + s->x0.e ^= s->x4.e; + s->x0.o ^= s->x4.o; + s->x4.e ^= s->x3.e; + s->x4.o ^= s->x3.o; + s->x2.e ^= s->x1.e; + s->x2.o ^= s->x1.o; + t.x0.e = s->x0.e; + t.x0.o = s->x0.o; + t.x4.e = s->x4.e; + t.x4.o = s->x4.o; + t.x3.e = s->x3.e; + t.x3.o = s->x3.o; + t.x1.e = s->x1.e; + t.x1.o = s->x1.o; + t.x2.e = s->x2.e; + t.x2.o = s->x2.o; + s->x0.e = t.x0.e ^ (~t.x1.e & t.x2.e); + s->x0.o = t.x0.o ^ (~t.x1.o & t.x2.o); + s->x2.e = t.x2.e ^ (~t.x3.e & t.x4.e); + s->x2.o = t.x2.o ^ (~t.x3.o & t.x4.o); + s->x4.e = t.x4.e ^ (~t.x0.e & t.x1.e); + s->x4.o = t.x4.o ^ (~t.x0.o & t.x1.o); + s->x1.e = t.x1.e ^ (~t.x2.e & t.x3.e); + s->x1.o = t.x1.o ^ (~t.x2.o & t.x3.o); + s->x3.e = t.x3.e ^ (~t.x4.e & t.x0.e); + s->x3.o = t.x3.o ^ (~t.x4.o & t.x0.o); + s->x1.e ^= s->x0.e; + s->x1.o ^= s->x0.o; + s->x3.e ^= s->x2.e; + s->x3.o ^= s->x2.o; + s->x0.e ^= s->x4.e; + s->x0.o ^= s->x4.o; + /* linear layer */ + t.x0.e = s->x0.e ^ ROR32(s->x0.o, 4); + t.x0.o = s->x0.o ^ ROR32(s->x0.e, 5); + t.x1.e = s->x1.e ^ ROR32(s->x1.e, 11); + t.x1.o = s->x1.o ^ ROR32(s->x1.o, 11); + t.x2.e = s->x2.e ^ ROR32(s->x2.o, 2); + t.x2.o = s->x2.o ^ ROR32(s->x2.e, 3); + t.x3.e = s->x3.e ^ ROR32(s->x3.o, 3); + t.x3.o = s->x3.o ^ ROR32(s->x3.e, 4); + t.x4.e = s->x4.e ^ ROR32(s->x4.e, 17); + t.x4.o = s->x4.o ^ ROR32(s->x4.o, 17); + s->x0.e ^= ROR32(t.x0.o, 9); + s->x0.o ^= ROR32(t.x0.e, 10); + s->x1.e ^= ROR32(t.x1.o, 19); + s->x1.o ^= ROR32(t.x1.e, 20); + s->x2.e ^= t.x2.o; + s->x2.o ^= ROR32(t.x2.e, 1); + s->x3.e ^= ROR32(t.x3.e, 5); + s->x3.o ^= ROR32(t.x3.o, 5); + s->x4.e ^= ROR32(t.x4.o, 3); + s->x4.o ^= ROR32(t.x4.e, 4); + s->x2.e = ~s->x2.e; + s->x2.o = ~s->x2.o; + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/word.c b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/word.c new file mode 100644 index 0000000..0ac4e63 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/word.c @@ -0,0 +1,36 @@ +#include "word.h" + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +uint64_t TOBI32(uint64_t in) { + uint32_t hi = in >> 32; + uint32_t lo = in; + uint32_t r0, r1; + r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); + r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); + r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); + r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); + r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); + r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); + r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); + r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); + r0 = (lo & 0x0000FFFF) | (hi << 16); + r1 = (lo >> 16) | (hi & 0xFFFF0000); + return (uint64_t)r1 << 32 | r0; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +uint64_t FROMBI32(uint64_t in) { + uint32_t r0 = in; + uint32_t r1 = in >> 32; + uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); + uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000); + r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); + r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); + r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); + r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); + r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); + r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); + r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); + r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); + return (uint64_t)hi << 32 | lo; +} diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/word.h b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/word.h new file mode 100644 index 0000000..45184ca --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi32_lowsize/word.h @@ -0,0 +1,82 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#include "config.h" + +typedef struct { + uint32_t e; + uint32_t o; +} word_t; + +__forceinline word_t WORD_T(uint64_t x) { + return (word_t){.o = x >> 32, .e = x}; +} + +__forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; } + +uint64_t TOBI32(uint64_t in); + +uint64_t FROMBI32(uint64_t in); + +__forceinline word_t U64TOWORD(uint64_t x) { + uint64_t w = TOBI32(x); + return (word_t){.o = w >> 32, .e = w}; +} + +__forceinline uint64_t WORDTOU64(word_t w) { + return FROMBI32((uint64_t)w.o << 32 | w.e); +} + +#define XOR(a, b) \ + do { \ + word_t tb = b; \ + (a).e ^= tb.e; \ + (a).o ^= tb.o; \ + } while (0) + +#define AND(a, b) \ + do { \ + word_t tb = b; \ + (a).e &= tb.e; \ + (a).o &= tb.o; \ + } while (0) + +__forceinline uint32_t ROR32(uint32_t x, int n) { + return x >> n | x << (32 - n); +} + +__forceinline word_t ROR64(word_t x, int n) { + word_t r; + r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); + r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); + return r; +} + +__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { + word_t r; + r.o = lo2hi.o << 16 | hi2lo.o >> 16; + r.e = lo2hi.e << 16 | hi2lo.e >> 16; + return r; +} + +__forceinline int NOTZERO(word_t a, word_t b) { + int result = 0; + for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i]; + for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i]; + return result; +} + +/* set padding byte in 64-bit Ascon word */ +__forceinline word_t PAD(int i) { + return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32); +} + +/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ +__forceinline word_t XMASK(int n) { + uint32_t mask = 0x0fffffff >> (n * 4 - 4); + return WORD_T((uint64_t)mask << 32 | mask); +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi8/api.h b/ascon/Implementations/crypto_hash/asconhashv12/bi8/api.h index ae8c7f6..ad0325e 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/bi8/api.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi8/api.h @@ -1 +1,3 @@ #define CRYPTO_BYTES 32 +#define ASCON_RATE 8 +#define ASCON_HASH diff --git a/ascon/Implementations/crypto_hash/asconhashv12/bi8/hash.c b/ascon/Implementations/crypto_hash/asconhashv12/bi8/hash.c index 10a1166..056b3fd 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/bi8/hash.c +++ b/ascon/Implementations/crypto_hash/asconhashv12/bi8/hash.c @@ -1,14 +1,8 @@ -#include "api.h" - -typedef unsigned char u8; -typedef unsigned short u16; -typedef unsigned int u32; -typedef unsigned long long u64; +#include -#define RATE (64 / 8) -#define PA_ROUNDS 12 +#include "api.h" -#define ROTR8(x, n) (((x) >> (n)) | ((x) << (8 - (n)))) +#define ROR8(x, n) (((x) >> (n)) | ((x) << (8 - (n)))) #define COMPRESS_BYTE_ARRAY_8(a, var_7, var_6, var_5, var_4, var_3, var_2, \ var_1, var_0) \ @@ -268,8 +262,9 @@ typedef unsigned long long u64; a[7] |= t1_7 & 128; \ } while (0) -// This way of implementing Ascon's S-box was inpired by personal communication -// with Joan Daemen about implementing the 3-bit chi layer. +/* This way of implementing Ascon's S-box was inpired by personal communication + */ +/* with Joan Daemen about implementing the 3-bit chi layer. */ #define ROUND_16(C_7, C_6, C_5, C_4, C_3, C_2, C_1, C_0) \ do { \ /* round constant */ \ @@ -427,22 +422,22 @@ typedef unsigned long long u64; t0_5 = x0_5; \ t0_6 = x0_6; \ t0_7 = x0_7; \ - x0_5 ^= ROTR8(t0_0, 3); \ - x0_6 ^= ROTR8(t0_1, 3); \ - x0_7 ^= ROTR8(t0_2, 3); \ - x0_0 ^= ROTR8(t0_3, 2); \ - x0_1 ^= ROTR8(t0_4, 2); \ - x0_2 ^= ROTR8(t0_5, 2); \ - x0_3 ^= ROTR8(t0_6, 2); \ - x0_4 ^= ROTR8(t0_7, 2); \ - x0_4 ^= ROTR8(t0_0, 4); \ - x0_5 ^= ROTR8(t0_1, 4); \ - x0_6 ^= ROTR8(t0_2, 4); \ - x0_7 ^= ROTR8(t0_3, 4); \ - x0_0 ^= ROTR8(t0_4, 3); \ - x0_1 ^= ROTR8(t0_5, 3); \ - x0_2 ^= ROTR8(t0_6, 3); \ - x0_3 ^= ROTR8(t0_7, 3); \ + x0_5 ^= ROR8(t0_0, 3); \ + x0_6 ^= ROR8(t0_1, 3); \ + x0_7 ^= ROR8(t0_2, 3); \ + x0_0 ^= ROR8(t0_3, 2); \ + x0_1 ^= ROR8(t0_4, 2); \ + x0_2 ^= ROR8(t0_5, 2); \ + x0_3 ^= ROR8(t0_6, 2); \ + x0_4 ^= ROR8(t0_7, 2); \ + x0_4 ^= ROR8(t0_0, 4); \ + x0_5 ^= ROR8(t0_1, 4); \ + x0_6 ^= ROR8(t0_2, 4); \ + x0_7 ^= ROR8(t0_3, 4); \ + x0_0 ^= ROR8(t0_4, 3); \ + x0_1 ^= ROR8(t0_5, 3); \ + x0_2 ^= ROR8(t0_6, 3); \ + x0_3 ^= ROR8(t0_7, 3); \ t0_0 = x1_0; \ t0_1 = x1_1; \ t0_2 = x1_2; \ @@ -456,17 +451,17 @@ typedef unsigned long long u64; x1_5 ^= t0_2; \ x1_6 ^= t0_3; \ x1_7 ^= t0_4; \ - x1_0 ^= ROTR8(t0_5, 7); \ - x1_1 ^= ROTR8(t0_6, 7); \ - x1_2 ^= ROTR8(t0_7, 7); \ - x1_1 ^= ROTR8(t0_0, 5); \ - x1_2 ^= ROTR8(t0_1, 5); \ - x1_3 ^= ROTR8(t0_2, 5); \ - x1_4 ^= ROTR8(t0_3, 5); \ - x1_5 ^= ROTR8(t0_4, 5); \ - x1_6 ^= ROTR8(t0_5, 5); \ - x1_7 ^= ROTR8(t0_6, 5); \ - x1_0 ^= ROTR8(t0_7, 4); \ + x1_0 ^= ROR8(t0_5, 7); \ + x1_1 ^= ROR8(t0_6, 7); \ + x1_2 ^= ROR8(t0_7, 7); \ + x1_1 ^= ROR8(t0_0, 5); \ + x1_2 ^= ROR8(t0_1, 5); \ + x1_3 ^= ROR8(t0_2, 5); \ + x1_4 ^= ROR8(t0_3, 5); \ + x1_5 ^= ROR8(t0_4, 5); \ + x1_6 ^= ROR8(t0_5, 5); \ + x1_7 ^= ROR8(t0_6, 5); \ + x1_0 ^= ROR8(t0_7, 4); \ t0_0 = x2_0; \ t0_1 = x2_1; \ t0_2 = x2_2; \ @@ -475,7 +470,7 @@ typedef unsigned long long u64; t0_5 = x2_5; \ t0_6 = x2_6; \ t0_7 = x2_7; \ - x2_7 ^= ROTR8(t0_0, 1); \ + x2_7 ^= ROR8(t0_0, 1); \ x2_0 ^= t0_1; \ x2_1 ^= t0_2; \ x2_2 ^= t0_3; \ @@ -483,12 +478,12 @@ typedef unsigned long long u64; x2_4 ^= t0_5; \ x2_5 ^= t0_6; \ x2_6 ^= t0_7; \ - x2_2 ^= ROTR8(t0_0, 1); \ - x2_3 ^= ROTR8(t0_1, 1); \ - x2_4 ^= ROTR8(t0_2, 1); \ - x2_5 ^= ROTR8(t0_3, 1); \ - x2_6 ^= ROTR8(t0_4, 1); \ - x2_7 ^= ROTR8(t0_5, 1); \ + x2_2 ^= ROR8(t0_0, 1); \ + x2_3 ^= ROR8(t0_1, 1); \ + x2_4 ^= ROR8(t0_2, 1); \ + x2_5 ^= ROR8(t0_3, 1); \ + x2_6 ^= ROR8(t0_4, 1); \ + x2_7 ^= ROR8(t0_5, 1); \ x2_0 ^= t0_6; \ x2_1 ^= t0_7; \ t0_0 = x3_0; \ @@ -499,22 +494,22 @@ typedef unsigned long long u64; t0_5 = x3_5; \ t0_6 = x3_6; \ t0_7 = x3_7; \ - x3_6 ^= ROTR8(t0_0, 2); \ - x3_7 ^= ROTR8(t0_1, 2); \ - x3_0 ^= ROTR8(t0_2, 1); \ - x3_1 ^= ROTR8(t0_3, 1); \ - x3_2 ^= ROTR8(t0_4, 1); \ - x3_3 ^= ROTR8(t0_5, 1); \ - x3_4 ^= ROTR8(t0_6, 1); \ - x3_5 ^= ROTR8(t0_7, 1); \ - x3_7 ^= ROTR8(t0_0, 3); \ - x3_0 ^= ROTR8(t0_1, 2); \ - x3_1 ^= ROTR8(t0_2, 2); \ - x3_2 ^= ROTR8(t0_3, 2); \ - x3_3 ^= ROTR8(t0_4, 2); \ - x3_4 ^= ROTR8(t0_5, 2); \ - x3_5 ^= ROTR8(t0_6, 2); \ - x3_6 ^= ROTR8(t0_7, 2); \ + x3_6 ^= ROR8(t0_0, 2); \ + x3_7 ^= ROR8(t0_1, 2); \ + x3_0 ^= ROR8(t0_2, 1); \ + x3_1 ^= ROR8(t0_3, 1); \ + x3_2 ^= ROR8(t0_4, 1); \ + x3_3 ^= ROR8(t0_5, 1); \ + x3_4 ^= ROR8(t0_6, 1); \ + x3_5 ^= ROR8(t0_7, 1); \ + x3_7 ^= ROR8(t0_0, 3); \ + x3_0 ^= ROR8(t0_1, 2); \ + x3_1 ^= ROR8(t0_2, 2); \ + x3_2 ^= ROR8(t0_3, 2); \ + x3_3 ^= ROR8(t0_4, 2); \ + x3_4 ^= ROR8(t0_5, 2); \ + x3_5 ^= ROR8(t0_6, 2); \ + x3_6 ^= ROR8(t0_7, 2); \ t0_0 = x4_0; \ t0_1 = x4_1; \ t0_2 = x4_2; \ @@ -523,22 +518,22 @@ typedef unsigned long long u64; t0_5 = x4_5; \ t0_6 = x4_6; \ t0_7 = x4_7; \ - x4_1 ^= ROTR8(t0_0, 1); \ - x4_2 ^= ROTR8(t0_1, 1); \ - x4_3 ^= ROTR8(t0_2, 1); \ - x4_4 ^= ROTR8(t0_3, 1); \ - x4_5 ^= ROTR8(t0_4, 1); \ - x4_6 ^= ROTR8(t0_5, 1); \ - x4_7 ^= ROTR8(t0_6, 1); \ + x4_1 ^= ROR8(t0_0, 1); \ + x4_2 ^= ROR8(t0_1, 1); \ + x4_3 ^= ROR8(t0_2, 1); \ + x4_4 ^= ROR8(t0_3, 1); \ + x4_5 ^= ROR8(t0_4, 1); \ + x4_6 ^= ROR8(t0_5, 1); \ + x4_7 ^= ROR8(t0_6, 1); \ x4_0 ^= t0_7; \ - x4_7 ^= ROTR8(t0_0, 6); \ - x4_0 ^= ROTR8(t0_1, 5); \ - x4_1 ^= ROTR8(t0_2, 5); \ - x4_2 ^= ROTR8(t0_3, 5); \ - x4_3 ^= ROTR8(t0_4, 5); \ - x4_4 ^= ROTR8(t0_5, 5); \ - x4_5 ^= ROTR8(t0_6, 5); \ - x4_6 ^= ROTR8(t0_7, 5); \ + x4_7 ^= ROR8(t0_0, 6); \ + x4_0 ^= ROR8(t0_1, 5); \ + x4_1 ^= ROR8(t0_2, 5); \ + x4_2 ^= ROR8(t0_3, 5); \ + x4_3 ^= ROR8(t0_4, 5); \ + x4_4 ^= ROR8(t0_5, 5); \ + x4_5 ^= ROR8(t0_6, 5); \ + x4_6 ^= ROR8(t0_7, 5); \ } while (0) #define P12_8 \ @@ -557,40 +552,39 @@ typedef unsigned long long u64; ROUND_16(0, 1, 0, 0, 1, 0, 1, 1); \ } while (0) -int crypto_hash(unsigned char *out, const unsigned char *in, - unsigned long long inlen) { - u64 rlen; - u64 i; +int crypto_hash(uint8_t* out, const uint8_t* in, uint64_t inlen) { + uint64_t rlen; + uint64_t i; - u8 buffer[8]; + uint8_t buffer[8]; - u8 x0_0, x1_0, x2_0, x3_0, x4_0; - u8 t0_0, t1_0; + uint8_t x0_0, x1_0, x2_0, x3_0, x4_0; + uint8_t t0_0, t1_0; - u8 x0_1, x1_1, x2_1, x3_1, x4_1; - u8 t0_1, t1_1; + uint8_t x0_1, x1_1, x2_1, x3_1, x4_1; + uint8_t t0_1, t1_1; - u8 x0_2, x1_2, x2_2, x3_2, x4_2; - u8 t0_2, t1_2; + uint8_t x0_2, x1_2, x2_2, x3_2, x4_2; + uint8_t t0_2, t1_2; - u8 x0_3, x1_3, x2_3, x3_3, x4_3; - u8 t0_3, t1_3; + uint8_t x0_3, x1_3, x2_3, x3_3, x4_3; + uint8_t t0_3, t1_3; - u8 x0_4, x1_4, x2_4, x3_4, x4_4; - u8 t0_4, t1_4; + uint8_t x0_4, x1_4, x2_4, x3_4, x4_4; + uint8_t t0_4, t1_4; - u8 x0_5, x1_5, x2_5, x3_5, x4_5; - u8 t0_5, t1_5; + uint8_t x0_5, x1_5, x2_5, x3_5, x4_5; + uint8_t t0_5, t1_5; - u8 x0_6, x1_6, x2_6, x3_6, x4_6; - u8 t0_6, t1_6; + uint8_t x0_6, x1_6, x2_6, x3_6, x4_6; + uint8_t t0_6, t1_6; - u8 x0_7, x1_7, x2_7, x3_7, x4_7; - u8 t0_7, t1_7; + uint8_t x0_7, x1_7, x2_7, x3_7, x4_7; + uint8_t t0_7, t1_7; - u8 in_0, in_1, in_2, in_3, in_4, in_5, in_6, in_7; + uint8_t in_0, in_1, in_2, in_3, in_4, in_5, in_6, in_7; - // initialization + /* initialization */ x0_0 = 0x4d; x0_1 = 0xdc; x0_2 = 0x85; @@ -632,9 +626,9 @@ int crypto_hash(unsigned char *out, const unsigned char *in, x4_6 = 0x1b; x4_7 = 0x7a; - // absorb + /* absorb */ rlen = inlen; - while (rlen >= RATE) { + while (rlen >= ASCON_RATE) { COMPRESS_BYTE_ARRAY_8(in, in_7, in_6, in_5, in_4, in_3, in_2, in_1, in_0); x0_0 ^= in_0; x0_1 ^= in_1; @@ -645,8 +639,8 @@ int crypto_hash(unsigned char *out, const unsigned char *in, x0_6 ^= in_6; x0_7 ^= in_7; P12_8; - rlen -= RATE; - in += RATE; + rlen -= ASCON_RATE; + in += ASCON_RATE; } for (i = 0; i < rlen; ++i, ++in) buffer[i] = *in; buffer[rlen] = 0x80; @@ -662,16 +656,15 @@ int crypto_hash(unsigned char *out, const unsigned char *in, x0_7 ^= in_7; P12_8; - // squeeze (full blocks) + /* squeeze (full blocks) */ rlen = CRYPTO_BYTES; - while (rlen > RATE) { + while (rlen > ASCON_RATE) { EXPAND_BYTE_ARRAY_8(out, x0_7, x0_6, x0_5, x0_4, x0_3, x0_2, x0_1, x0_0); P12_8; - rlen -= RATE; - out += RATE; + rlen -= ASCON_RATE; + out += ASCON_RATE; } EXPAND_BYTE_ARRAY_8(out, x0_7, x0_6, x0_5, x0_4, x0_3, x0_2, x0_1, x0_0); return 0; } - diff --git a/ascon/Implementations/crypto_hash/asconhashv12/opt64/api.h b/ascon/Implementations/crypto_hash/asconhashv12/opt64/api.h index ae8c7f6..ad0325e 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/opt64/api.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/opt64/api.h @@ -1 +1,3 @@ #define CRYPTO_BYTES 32 +#define ASCON_RATE 8 +#define ASCON_HASH diff --git a/ascon/Implementations/crypto_hash/asconhashv12/opt64/ascon.h b/ascon/Implementations/crypto_hash/asconhashv12/opt64/ascon.h new file mode 100644 index 0000000..10a5b6e --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/opt64/ascon.h @@ -0,0 +1,19 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#include "config.h" +#include "word.h" + +typedef struct { + word_t x0, x1, x2, x3, x4; +} state_t; + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k); +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen); +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); +void ascon_final(state_t* s, const uint8_t* k); + +#endif // ASCON_H_ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/opt64/config.h b/ascon/Implementations/crypto_hash/asconhashv12/opt64/config.h new file mode 100644 index 0000000..8d8a1a0 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/opt64/config.h @@ -0,0 +1,34 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* inline the Ascon mode */ +#ifndef ASCON_INLINE_MODE +#define ASCON_INLINE_MODE 1 +#endif + +/* inline the Ascon permutations */ +#ifndef ASCON_INLINE_PERM +#define ASCON_INLINE_PERM 1 +#endif + +/* single function for all permutations */ +#ifndef ASCON_SINGLE_PERM +#define ASCON_SINGLE_PERM 0 +#endif + +/* unroll the permutation loops */ +#ifndef ASCON_UNROLL_LOOPS +#define ASCON_UNROLL_LOOPS 1 +#endif + +/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ +#ifndef ASCON_DATA_ACCESS +#define ASCON_DATA_ACCESS 'H' +#endif + +/* make sure __forceinline is supported */ +#ifndef __forceinline +#define __forceinline inline __attribute__((always_inline)) +#endif + +#endif /* CONFIG_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/opt64/endian.h b/ascon/Implementations/crypto_hash/asconhashv12/opt64/endian.h index ffbdcd9..3944360 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/opt64/endian.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/opt64/endian.h @@ -3,29 +3,37 @@ #if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -// macros for big endian machines +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif #define U64BIG(x) (x) #define U32BIG(x) (x) #define U16BIG(x) (x) #elif defined(_MSC_VER) || \ - (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -// macros for little endian machines -#define U64BIG(x) \ - ((((x) & 0x00000000000000FFULL) << 56) | (((x) & 0x000000000000FF00ULL) << 40) | \ - (((x) & 0x0000000000FF0000ULL) << 24) | (((x) & 0x00000000FF000000ULL) << 8) | \ - (((x) & 0x000000FF00000000ULL) >> 8) | (((x) & 0x0000FF0000000000ULL) >> 24) | \ - (((x) & 0x00FF000000000000ULL) >> 40) | (((x) & 0xFF00000000000000ULL) >> 56)) +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) #define U32BIG(x) \ - ((((x) & 0x000000FF) << 24) | (((x) & 0x0000FF00) << 8) | \ - (((x) & 0x00FF0000) >> 8) | (((x) & 0xFF000000) >> 24)) -#define U16BIG(x) \ - ((((x) & 0x00FF) << 8) | (((x) & 0xFF00) >> 8)) + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) #else -#error "ascon byte order macros not defined in endian.h" +#error "Ascon byte order macros not defined in endian.h" #endif -#endif // ENDIAN_H_ - +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/opt64/hash.c b/ascon/Implementations/crypto_hash/asconhashv12/opt64/hash.c index 5e3108e..34dec63 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/opt64/hash.c +++ b/ascon/Implementations/crypto_hash/asconhashv12/opt64/hash.c @@ -1,45 +1,51 @@ #include "api.h" -#include "endian.h" +#include "ascon.h" +#include "loadstore.h" #include "permutations.h" - -#define RATE (64 / 8) -#define PA_ROUNDS 12 - -int crypto_hash(unsigned char* out, const unsigned char* in, - unsigned long long inlen) { - state s; - u64 outlen; - u64 i; - - // initialization - s.x0 = 0xee9398aadb67f03dull; - s.x1 = 0x8bb21831c60f1002ull; - s.x2 = 0xb48a92db98d5da62ull; - s.x3 = 0x43189921b8f8e3e8ull; - s.x4 = 0x348fa5c9d525e140ull; - - // absorb plaintext - while (inlen >= RATE) { - s.x0 ^= U64BIG(*(u64*)in); - P12(); - inlen -= RATE; - in += RATE; +#include "printstate.h" + +int crypto_hash(uint8_t* out, const uint8_t* in, uint64_t len) { + state_t s; + + /* initialization */ +#ifdef ASCON_HASH + s.x0 = ASCON_HASH_IV0; + s.x1 = ASCON_HASH_IV1; + s.x2 = ASCON_HASH_IV2; + s.x3 = ASCON_HASH_IV3; + s.x4 = ASCON_HASH_IV4; +#endif +#ifdef ASCON_XOF + s.x0 = ASCON_XOF_IV0; + s.x1 = ASCON_XOF_IV1; + s.x2 = ASCON_XOF_IV2; + s.x3 = ASCON_XOF_IV3; + s.x4 = ASCON_XOF_IV4; +#endif + printstate("initialization", &s); + + /* absorb plaintext */ + while (len >= ASCON_RATE) { + XOR(s.x0, LOAD64(in)); + P12(&s); + in += ASCON_RATE; + len -= ASCON_RATE; } - for (i = 0; i < inlen; ++i, ++in) s.x0 ^= INS_BYTE64(*in, i); - s.x0 ^= INS_BYTE64(0x80, inlen); - - P12(); - - // absorb plaintext - outlen = CRYPTO_BYTES; - while (outlen > RATE) { - *(u64*)out = U64BIG(s.x0); - P12(); - outlen -= RATE; - out += RATE; + if (len) XOR(s.x0, LOAD(in, len)); + XOR(s.x0, PAD(len)); + P12(&s); + printstate("absorb plaintext", &s); + + /* squeeze output */ + len = CRYPTO_BYTES; + while (len > ASCON_RATE) { + STORE64(out, s.x0); + P12(&s); + out += ASCON_RATE; + len -= ASCON_RATE; } - *(u64*)out = U64BIG(s.x0); + STORE64(out, s.x0); + printstate("squeeze output", &s); return 0; } - diff --git a/ascon/Implementations/crypto_hash/asconhashv12/opt64/loadstore.h b/ascon/Implementations/crypto_hash/asconhashv12/opt64/loadstore.h new file mode 100644 index 0000000..6cb0cca --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/opt64/loadstore.h @@ -0,0 +1,118 @@ +#ifndef LOADSTORE_H_ +#define LOADSTORE_H_ + +#include + +#include "config.h" +#include "endian.h" +#include "word.h" + +/* 64-bit LSB mask (undefined for n == 0) */ +#define MASK(n) (~0ull >> (64 - (n))) + +/* get byte from Ascon 64-bit word */ +#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i)))) + +/* set byte in Ascon 64-bit word */ +#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i))) + +#if ASCON_DATA_ACCESS == 'W' + +#ifndef NDEBUG +#pragma message("Using wordwise data access") +#endif + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = U64BIG(*(uint64_t*)bytes); + return U64TOWORD(x); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = U64BIG(*(uint64_t*)bytes & MASK(8 * n)); + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(8 * n); + *(uint64_t*)bytes |= U64BIG(x); +} + +#elif ASCON_DATA_ACCESS == 'M' + +#ifndef NDEBUG +#pragma message("Using memcpy to access data") +#endif + +#include + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + memcpy((uint8_t*)&x, bytes, n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = U64BIG(WORDTOU64(w)); + memcpy(bytes, (uint8_t*)&x, n); +} + +#elif ASCON_DATA_ACCESS == 'B' + +#ifndef NDEBUG +#pragma message("Using bytewise data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#elif ASCON_DATA_ACCESS == 'H' + +#ifndef NDEBUG +#pragma message("Using hybrid data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + if (n == 8) + x = U64BIG(*(uint64_t*)bytes); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + if (n == 8) + *(uint64_t*)bytes = U64BIG(x); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#else +#error "Ascon data access macro not defined correctly" +#endif + +#endif /* LOADSTORE_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/opt64/permutations.c b/ascon/Implementations/crypto_hash/asconhashv12/opt64/permutations.c new file mode 100644 index 0000000..3aecaa6 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/opt64/permutations.c @@ -0,0 +1,26 @@ +#include "permutations.h" + +#include "round.h" + +#if ASCON_INLINE_PERM + +#elif ASCON_SINGLE_PERM + +void P(state_t* s, uint8_t rounds) { + printstate(" permutation input", s); + for (int i = START(rounds); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s) { P12ROUNDS(s); } + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16 +void P8(state_t* s) { P8ROUNDS(s); } +#endif + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 +void P6(state_t* s) { P6ROUNDS(s); } +#endif + +#endif diff --git a/ascon/Implementations/crypto_hash/asconhashv12/opt64/permutations.h b/ascon/Implementations/crypto_hash/asconhashv12/opt64/permutations.h index 4af1e2c..6172dd5 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/opt64/permutations.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/opt64/permutations.h @@ -1,102 +1,184 @@ #ifndef PERMUTATIONS_H_ #define PERMUTATIONS_H_ -typedef unsigned char u8; -typedef unsigned long long u64; - -typedef struct { - u64 x0, x1, x2, x3, x4; -} state; - -#define EXT_BYTE64(x, n) ((u8)((u64)(x) >> (8 * (7 - (n))))) -#define INS_BYTE64(x, n) ((u64)(x) << (8 * (7 - (n)))) -#define ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) - -#define ROUND(C) \ - do { \ - state t; \ - s.x2 ^= C; \ - s.x0 ^= s.x4; \ - s.x4 ^= s.x3; \ - s.x2 ^= s.x1; \ - t.x0 = s.x0; \ - t.x4 = s.x4; \ - t.x3 = s.x3; \ - t.x1 = s.x1; \ - t.x2 = s.x2; \ - s.x0 = t.x0 ^ ((~t.x1) & t.x2); \ - s.x2 = t.x2 ^ ((~t.x3) & t.x4); \ - s.x4 = t.x4 ^ ((~t.x0) & t.x1); \ - s.x1 = t.x1 ^ ((~t.x2) & t.x3); \ - s.x3 = t.x3 ^ ((~t.x4) & t.x0); \ - s.x1 ^= s.x0; \ - t.x1 = s.x1; \ - s.x1 = ROTR64(s.x1, 39); \ - s.x3 ^= s.x2; \ - t.x2 = s.x2; \ - s.x2 = ROTR64(s.x2, 1); \ - t.x4 = s.x4; \ - t.x2 ^= s.x2; \ - s.x2 = ROTR64(s.x2, 6 - 1); \ - t.x3 = s.x3; \ - t.x1 ^= s.x1; \ - s.x3 = ROTR64(s.x3, 10); \ - s.x0 ^= s.x4; \ - s.x4 = ROTR64(s.x4, 7); \ - t.x3 ^= s.x3; \ - s.x2 ^= t.x2; \ - s.x1 = ROTR64(s.x1, 61 - 39); \ - t.x0 = s.x0; \ - s.x2 = ~s.x2; \ - s.x3 = ROTR64(s.x3, 17 - 10); \ - t.x4 ^= s.x4; \ - s.x4 = ROTR64(s.x4, 41 - 7); \ - s.x3 ^= t.x3; \ - s.x1 ^= t.x1; \ - s.x0 = ROTR64(s.x0, 19); \ - s.x4 ^= t.x4; \ - t.x0 ^= s.x0; \ - s.x0 = ROTR64(s.x0, 28 - 19); \ - s.x0 ^= t.x0; \ - } while (0) - -#define P12() \ - do { \ - ROUND(0xf0); \ - ROUND(0xe1); \ - ROUND(0xd2); \ - ROUND(0xc3); \ - ROUND(0xb4); \ - ROUND(0xa5); \ - ROUND(0x96); \ - ROUND(0x87); \ - ROUND(0x78); \ - ROUND(0x69); \ - ROUND(0x5a); \ - ROUND(0x4b); \ - } while (0) - -#define P8() \ - do { \ - ROUND(0xb4); \ - ROUND(0xa5); \ - ROUND(0x96); \ - ROUND(0x87); \ - ROUND(0x78); \ - ROUND(0x69); \ - ROUND(0x5a); \ - ROUND(0x4b); \ - } while (0) - -#define P6() \ - do { \ - ROUND(0x96); \ - ROUND(0x87); \ - ROUND(0x78); \ - ROUND(0x69); \ - ROUND(0x5a); \ - ROUND(0x4b); \ - } while (0) - -#endif // PERMUTATIONS_H_ +#include +#include "api.h" +#include "ascon.h" +#include "printstate.h" +#include "round.h" + +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 + +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 + +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 + +#define ASCON_HASH_BYTES 32 + +#define ASCON_128_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_128A_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) + +#define ASCON_80PQ_IV \ + U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_HASH_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) + +#define ASCON_XOF_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) + +#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) +#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) +#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) +#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) +#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) + +#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) +#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) +#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) +#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) +#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 +#define IV ASCON_128_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#if ASCON_RATE == 16 +#define IV ASCON_128A_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define PB P8 +#endif + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 +#define IV ASCON_80PQ_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#define START(n) ((3 + (n)) << 4 | (12 - (n))) + +#if ASCON_UNROLL_LOOPS + +__forceinline void P12ROUNDS(state_t* s) { + ROUND(s, 0xf0); + ROUND(s, 0xe1); + ROUND(s, 0xd2); + ROUND(s, 0xc3); + ROUND(s, 0xb4); + ROUND(s, 0xa5); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +__forceinline void P8ROUNDS(state_t* s) { + ROUND(s, 0xb4); + ROUND(s, 0xa5); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +__forceinline void P6ROUNDS(state_t* s) { + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +#else /* !ASCON_UNROLL_LOOPS */ + +__forceinline void P12ROUNDS(state_t* s) { + for (int i = START(12); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +__forceinline void P8ROUNDS(state_t* s) { + for (int i = START(8); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +__forceinline void P6ROUNDS(state_t* s) { + for (int i = START(6); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +#endif + +#if ASCON_INLINE_PERM + +__forceinline void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +__forceinline void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} + +__forceinline void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#elif ASCON_SINGLE_PERM + +#define P12(s) P(s, 12) +#define P8(s) P(s, 8) +#define P6(s) P(s, 6) + +void P(state_t* s, uint8_t rounds); + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s); +void P8(state_t* s); +void P6(state_t* s); + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#endif + +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/opt64/printstate.h b/ascon/Implementations/crypto_hash/asconhashv12/opt64/printstate.h new file mode 100644 index 0000000..34bd476 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/opt64/printstate.h @@ -0,0 +1,31 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/opt64/round.h b/ascon/Implementations/crypto_hash/asconhashv12/opt64/round.h new file mode 100644 index 0000000..077cbfd --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/opt64/round.h @@ -0,0 +1,69 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" + +__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { + *K0 = WORD_T(0); + *K1 = WORD_T(0); + *K2 = WORD_T(0); +} + +__forceinline void PINIT(state_t* s) { + s->x0 = WORD_T(0); + s->x1 = WORD_T(0); + s->x2 = WORD_T(0); + s->x3 = WORD_T(0); + s->x4 = WORD_T(0); +} + +__forceinline void ROUND(state_t* s, uint64_t C) { + state_t t; + s->x2 ^= C; + s->x0 ^= s->x4; + s->x4 ^= s->x3; + s->x2 ^= s->x1; + t.x0 = s->x0; + t.x4 = s->x4; + t.x3 = s->x3; + t.x1 = s->x1; + t.x2 = s->x2; + s->x0 = t.x0 ^ (~t.x1 & t.x2); + s->x2 = t.x2 ^ (~t.x3 & t.x4); + s->x4 = t.x4 ^ (~t.x0 & t.x1); + s->x1 = t.x1 ^ (~t.x2 & t.x3); + s->x3 = t.x3 ^ (~t.x4 & t.x0); + s->x1 ^= s->x0; + t.x1 = s->x1; + s->x1 = ROR64(s->x1, 39); + s->x3 ^= s->x2; + t.x2 = s->x2; + s->x2 = ROR64(s->x2, 1); + t.x4 = s->x4; + t.x2 ^= s->x2; + s->x2 = ROR64(s->x2, 6 - 1); + t.x3 = s->x3; + t.x1 ^= s->x1; + s->x3 = ROR64(s->x3, 10); + s->x0 ^= s->x4; + s->x4 = ROR64(s->x4, 7); + t.x3 ^= s->x3; + s->x2 ^= t.x2; + s->x1 = ROR64(s->x1, 61 - 39); + t.x0 = s->x0; + s->x2 = ~s->x2; + s->x3 = ROR64(s->x3, 17 - 10); + t.x4 ^= s->x4; + s->x4 = ROR64(s->x4, 41 - 7); + s->x3 ^= t.x3; + s->x1 ^= t.x1; + s->x0 = ROR64(s->x0, 19); + s->x4 ^= t.x4; + t.x0 ^= s->x0; + s->x0 = ROR64(s->x0, 28 - 19); + s->x0 ^= t.x0; + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/opt64/word.h b/ascon/Implementations/crypto_hash/asconhashv12/opt64/word.h new file mode 100644 index 0000000..5d601bb --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/opt64/word.h @@ -0,0 +1,47 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#include "config.h" + +typedef uint64_t word_t; + +#define WORD_T +#define UINT64_T + +#define U64TOWORD +#define WORDTOU64 + +#define XOR(a, b) \ + do { \ + (a) ^= (b); \ + } while (0) + +#define AND(a, b) \ + do { \ + (a) &= (b); \ + } while (0) + +__forceinline word_t ROR64(word_t x, int n) { return x >> n | x << (64 - n); } + +__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { + return lo2hi << 32 | hi2lo >> 32; +} + +__forceinline int NOTZERO(word_t a, word_t b) { + int result = 0; + for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i]; + for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i]; + return result; +} + +/* set padding byte in 64-bit Ascon word */ +__forceinline word_t PAD(int i) { return WORD_T(0x80ull << (56 - 8 * i)); } + +/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ +__forceinline word_t XMASK(int n) { + return WORD_T(0x00ffffffffffffffull >> (n * 8 - 8)); +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/api.h b/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/api.h new file mode 100644 index 0000000..ad0325e --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/api.h @@ -0,0 +1,3 @@ +#define CRYPTO_BYTES 32 +#define ASCON_RATE 8 +#define ASCON_HASH diff --git a/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/ascon.h b/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/ascon.h new file mode 100644 index 0000000..aa685d3 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/ascon.h @@ -0,0 +1,24 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#include "config.h" +#include "word.h" + +typedef struct { + word_t x0, x1, x2, x3, x4; +} state_t; + +#define ASCON_AD 0 +#define ASCON_ENC 1 +#define ASCON_DEC 2 + +void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, + uint8_t mode); + +void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen, + const uint8_t* ad, uint64_t adlen, const uint8_t* npub, + const uint8_t* k, uint8_t mode); + +#endif // ASCON_H_ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/config.h b/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/config.h new file mode 100644 index 0000000..ec8bd6f --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/config.h @@ -0,0 +1,34 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* inline the Ascon mode */ +#ifndef ASCON_INLINE_MODE +#define ASCON_INLINE_MODE 1 +#endif + +/* inline the Ascon permutations */ +#ifndef ASCON_INLINE_PERM +#define ASCON_INLINE_PERM 0 +#endif + +/* single function for all permutations */ +#ifndef ASCON_SINGLE_PERM +#define ASCON_SINGLE_PERM 1 +#endif + +/* unroll the permutation loops */ +#ifndef ASCON_UNROLL_LOOPS +#define ASCON_UNROLL_LOOPS 1 +#endif + +/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ +#ifndef ASCON_DATA_ACCESS +#define ASCON_DATA_ACCESS 'H' +#endif + +/* make sure __forceinline is supported */ +#ifndef __forceinline +#define __forceinline inline __attribute__((always_inline)) +#endif + +#endif /* CONFIG_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/endian.h b/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/endian.h new file mode 100644 index 0000000..3944360 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/endian.h @@ -0,0 +1,39 @@ +#ifndef ENDIAN_H_ +#define ENDIAN_H_ + +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif +#define U64BIG(x) (x) +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +#elif defined(_MSC_VER) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) +#define U32BIG(x) \ + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) + +#else +#error "Ascon byte order macros not defined in endian.h" +#endif + +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/hash.c b/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/hash.c new file mode 100644 index 0000000..34dec63 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/hash.c @@ -0,0 +1,51 @@ +#include "api.h" +#include "ascon.h" +#include "loadstore.h" +#include "permutations.h" +#include "printstate.h" + +int crypto_hash(uint8_t* out, const uint8_t* in, uint64_t len) { + state_t s; + + /* initialization */ +#ifdef ASCON_HASH + s.x0 = ASCON_HASH_IV0; + s.x1 = ASCON_HASH_IV1; + s.x2 = ASCON_HASH_IV2; + s.x3 = ASCON_HASH_IV3; + s.x4 = ASCON_HASH_IV4; +#endif +#ifdef ASCON_XOF + s.x0 = ASCON_XOF_IV0; + s.x1 = ASCON_XOF_IV1; + s.x2 = ASCON_XOF_IV2; + s.x3 = ASCON_XOF_IV3; + s.x4 = ASCON_XOF_IV4; +#endif + printstate("initialization", &s); + + /* absorb plaintext */ + while (len >= ASCON_RATE) { + XOR(s.x0, LOAD64(in)); + P12(&s); + in += ASCON_RATE; + len -= ASCON_RATE; + } + if (len) XOR(s.x0, LOAD(in, len)); + XOR(s.x0, PAD(len)); + P12(&s); + printstate("absorb plaintext", &s); + + /* squeeze output */ + len = CRYPTO_BYTES; + while (len > ASCON_RATE) { + STORE64(out, s.x0); + P12(&s); + out += ASCON_RATE; + len -= ASCON_RATE; + } + STORE64(out, s.x0); + printstate("squeeze output", &s); + + return 0; +} diff --git a/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/implementors b/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/implementors new file mode 100644 index 0000000..b110c1a --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/implementors @@ -0,0 +1,2 @@ +Christoph Dobraunig +Martin Schläffer diff --git a/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/loadstore.h b/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/loadstore.h new file mode 100644 index 0000000..6cb0cca --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/loadstore.h @@ -0,0 +1,118 @@ +#ifndef LOADSTORE_H_ +#define LOADSTORE_H_ + +#include + +#include "config.h" +#include "endian.h" +#include "word.h" + +/* 64-bit LSB mask (undefined for n == 0) */ +#define MASK(n) (~0ull >> (64 - (n))) + +/* get byte from Ascon 64-bit word */ +#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i)))) + +/* set byte in Ascon 64-bit word */ +#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i))) + +#if ASCON_DATA_ACCESS == 'W' + +#ifndef NDEBUG +#pragma message("Using wordwise data access") +#endif + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = U64BIG(*(uint64_t*)bytes); + return U64TOWORD(x); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = U64BIG(*(uint64_t*)bytes & MASK(8 * n)); + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(8 * n); + *(uint64_t*)bytes |= U64BIG(x); +} + +#elif ASCON_DATA_ACCESS == 'M' + +#ifndef NDEBUG +#pragma message("Using memcpy to access data") +#endif + +#include + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + memcpy((uint8_t*)&x, bytes, n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = U64BIG(WORDTOU64(w)); + memcpy(bytes, (uint8_t*)&x, n); +} + +#elif ASCON_DATA_ACCESS == 'B' + +#ifndef NDEBUG +#pragma message("Using bytewise data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#elif ASCON_DATA_ACCESS == 'H' + +#ifndef NDEBUG +#pragma message("Using hybrid data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + if (n == 8) + x = U64BIG(*(uint64_t*)bytes); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + if (n == 8) + *(uint64_t*)bytes = U64BIG(x); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#else +#error "Ascon data access macro not defined correctly" +#endif + +#endif /* LOADSTORE_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/permutations.c b/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/permutations.c new file mode 100644 index 0000000..3aecaa6 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/permutations.c @@ -0,0 +1,26 @@ +#include "permutations.h" + +#include "round.h" + +#if ASCON_INLINE_PERM + +#elif ASCON_SINGLE_PERM + +void P(state_t* s, uint8_t rounds) { + printstate(" permutation input", s); + for (int i = START(rounds); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s) { P12ROUNDS(s); } + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16 +void P8(state_t* s) { P8ROUNDS(s); } +#endif + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 +void P6(state_t* s) { P6ROUNDS(s); } +#endif + +#endif diff --git a/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/permutations.h b/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/permutations.h new file mode 100644 index 0000000..6172dd5 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/permutations.h @@ -0,0 +1,184 @@ +#ifndef PERMUTATIONS_H_ +#define PERMUTATIONS_H_ + +#include + +#include "api.h" +#include "ascon.h" +#include "printstate.h" +#include "round.h" + +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 + +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 + +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 + +#define ASCON_HASH_BYTES 32 + +#define ASCON_128_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_128A_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) + +#define ASCON_80PQ_IV \ + U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_HASH_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) + +#define ASCON_XOF_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) + +#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) +#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) +#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) +#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) +#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) + +#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) +#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) +#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) +#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) +#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 +#define IV ASCON_128_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#if ASCON_RATE == 16 +#define IV ASCON_128A_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define PB P8 +#endif + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 +#define IV ASCON_80PQ_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#define START(n) ((3 + (n)) << 4 | (12 - (n))) + +#if ASCON_UNROLL_LOOPS + +__forceinline void P12ROUNDS(state_t* s) { + ROUND(s, 0xf0); + ROUND(s, 0xe1); + ROUND(s, 0xd2); + ROUND(s, 0xc3); + ROUND(s, 0xb4); + ROUND(s, 0xa5); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +__forceinline void P8ROUNDS(state_t* s) { + ROUND(s, 0xb4); + ROUND(s, 0xa5); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +__forceinline void P6ROUNDS(state_t* s) { + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +#else /* !ASCON_UNROLL_LOOPS */ + +__forceinline void P12ROUNDS(state_t* s) { + for (int i = START(12); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +__forceinline void P8ROUNDS(state_t* s) { + for (int i = START(8); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +__forceinline void P6ROUNDS(state_t* s) { + for (int i = START(6); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +#endif + +#if ASCON_INLINE_PERM + +__forceinline void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +__forceinline void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} + +__forceinline void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#elif ASCON_SINGLE_PERM + +#define P12(s) P(s, 12) +#define P8(s) P(s, 8) +#define P6(s) P(s, 6) + +void P(state_t* s, uint8_t rounds); + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s); +void P8(state_t* s); +void P6(state_t* s); + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#endif + +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/printstate.h b/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/printstate.h new file mode 100644 index 0000000..34bd476 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/printstate.h @@ -0,0 +1,31 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/round.h b/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/round.h new file mode 100644 index 0000000..077cbfd --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/round.h @@ -0,0 +1,69 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" + +__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { + *K0 = WORD_T(0); + *K1 = WORD_T(0); + *K2 = WORD_T(0); +} + +__forceinline void PINIT(state_t* s) { + s->x0 = WORD_T(0); + s->x1 = WORD_T(0); + s->x2 = WORD_T(0); + s->x3 = WORD_T(0); + s->x4 = WORD_T(0); +} + +__forceinline void ROUND(state_t* s, uint64_t C) { + state_t t; + s->x2 ^= C; + s->x0 ^= s->x4; + s->x4 ^= s->x3; + s->x2 ^= s->x1; + t.x0 = s->x0; + t.x4 = s->x4; + t.x3 = s->x3; + t.x1 = s->x1; + t.x2 = s->x2; + s->x0 = t.x0 ^ (~t.x1 & t.x2); + s->x2 = t.x2 ^ (~t.x3 & t.x4); + s->x4 = t.x4 ^ (~t.x0 & t.x1); + s->x1 = t.x1 ^ (~t.x2 & t.x3); + s->x3 = t.x3 ^ (~t.x4 & t.x0); + s->x1 ^= s->x0; + t.x1 = s->x1; + s->x1 = ROR64(s->x1, 39); + s->x3 ^= s->x2; + t.x2 = s->x2; + s->x2 = ROR64(s->x2, 1); + t.x4 = s->x4; + t.x2 ^= s->x2; + s->x2 = ROR64(s->x2, 6 - 1); + t.x3 = s->x3; + t.x1 ^= s->x1; + s->x3 = ROR64(s->x3, 10); + s->x0 ^= s->x4; + s->x4 = ROR64(s->x4, 7); + t.x3 ^= s->x3; + s->x2 ^= t.x2; + s->x1 = ROR64(s->x1, 61 - 39); + t.x0 = s->x0; + s->x2 = ~s->x2; + s->x3 = ROR64(s->x3, 17 - 10); + t.x4 ^= s->x4; + s->x4 = ROR64(s->x4, 41 - 7); + s->x3 ^= t.x3; + s->x1 ^= t.x1; + s->x0 = ROR64(s->x0, 19); + s->x4 ^= t.x4; + t.x0 ^= s->x0; + s->x0 = ROR64(s->x0, 28 - 19); + s->x0 ^= t.x0; + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/word.h b/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/word.h new file mode 100644 index 0000000..5d601bb --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/opt64_lowsize/word.h @@ -0,0 +1,47 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#include "config.h" + +typedef uint64_t word_t; + +#define WORD_T +#define UINT64_T + +#define U64TOWORD +#define WORDTOU64 + +#define XOR(a, b) \ + do { \ + (a) ^= (b); \ + } while (0) + +#define AND(a, b) \ + do { \ + (a) &= (b); \ + } while (0) + +__forceinline word_t ROR64(word_t x, int n) { return x >> n | x << (64 - n); } + +__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { + return lo2hi << 32 | hi2lo >> 32; +} + +__forceinline int NOTZERO(word_t a, word_t b) { + int result = 0; + for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i]; + for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i]; + return result; +} + +/* set padding byte in 64-bit Ascon word */ +__forceinline word_t PAD(int i) { return WORD_T(0x80ull << (56 - 8 * i)); } + +/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ +__forceinline word_t XMASK(int n) { + return WORD_T(0x00ffffffffffffffull >> (n * 8 - 8)); +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/ref/ascon.h b/ascon/Implementations/crypto_hash/asconhashv12/ref/ascon.h new file mode 100644 index 0000000..8ab0502 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/ref/ascon.h @@ -0,0 +1,14 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#define WORDTOU64 + +typedef uint64_t word_t; + +typedef struct { + word_t x0, x1, x2, x3, x4; +} state_t; + +#endif // ASCON_H_ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/ref/hash.c b/ascon/Implementations/crypto_hash/asconhashv12/ref/hash.c index ed4f520..61dea5c 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/ref/hash.c +++ b/ascon/Implementations/crypto_hash/asconhashv12/ref/hash.c @@ -1,52 +1,43 @@ #include "api.h" +#include "ascon.h" +#include "loadstore.h" #include "permutations.h" +#include "printstate.h" -#define RATE (64 / 8) -#define PA_ROUNDS 12 -#define IV \ - ((u64)(8 * (RATE)) << 48 | (u64)(PA_ROUNDS) << 40 | \ - (u64)(8 * (CRYPTO_BYTES)) << 0) +int crypto_hash(uint8_t* out, const uint8_t* in, uint64_t len) { + state_t s; -int crypto_hash(unsigned char *out, const unsigned char *in, - unsigned long long inlen) { - state s; - u64 outlen; - - // initialization - s.x0 = IV; + /* initialization */ + s.x0 = ASCON_HASH_IV; s.x1 = 0; s.x2 = 0; s.x3 = 0; s.x4 = 0; - printstate("initial value:", s); P12(&s); - printstate("initialization:", s); + printstate("initialization", &s); - // absorb plaintext - inlen = inlen; - while (inlen >= RATE) { - s.x0 ^= BYTES_TO_U64(in, 8); + /* absorb plaintext */ + while (len >= ASCON_128_RATE) { + s.x0 ^= LOAD(in, 8); P12(&s); - inlen -= RATE; - in += RATE; + in += ASCON_128_RATE; + len -= ASCON_128_RATE; } - s.x0 ^= BYTES_TO_U64(in, inlen); - s.x0 ^= 0x80ull << (56 - 8 * inlen); - printstate("absorb plaintext:", s); - + s.x0 ^= LOAD(in, len); + s.x0 ^= PAD(len); P12(&s); - printstate("finalization:", s); + printstate("absorb plaintext", &s); - // set hash output - outlen = CRYPTO_BYTES; - while (outlen > RATE) { - U64_TO_BYTES(out, s.x0, 8); + /* squeeze output */ + len = CRYPTO_BYTES; + while (len > ASCON_128_RATE) { + STORE(out, s.x0, 8); P12(&s); - outlen -= RATE; - out += RATE; + out += ASCON_128_RATE; + len -= ASCON_128_RATE; } - U64_TO_BYTES(out, s.x0, 8); + STORE(out, s.x0, 8); + printstate("squeeze output", &s); return 0; } - diff --git a/ascon/Implementations/crypto_hash/asconhashv12/ref/loadstore.h b/ascon/Implementations/crypto_hash/asconhashv12/ref/loadstore.h new file mode 100644 index 0000000..eccf0e3 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/ref/loadstore.h @@ -0,0 +1,31 @@ +#ifndef LOADSTORE_H_ +#define LOADSTORE_H_ + +#include + +/* get byte from Ascon 64-bit word */ +#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i)))) + +/* set byte in Ascon 64-bit word */ +#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i))) + +/* set padding byte in Ascon 64-bit word */ +#define PAD(i) SETBYTE(0x80, i) + +static inline uint64_t MASK(int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) x |= SETBYTE(0xff, i); + return x; +} + +static inline uint64_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) x |= SETBYTE(bytes[i], i); + return x; +} + +static inline void STORE(uint8_t* bytes, uint64_t x, int n) { + for (int i = 0; i < n; ++i) bytes[i] = GETBYTE(x, i); +} + +#endif /* LOADSTORE_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/ref/permutations.h b/ascon/Implementations/crypto_hash/asconhashv12/ref/permutations.h index 3317a9f..ef6dd68 100644 --- a/ascon/Implementations/crypto_hash/asconhashv12/ref/permutations.h +++ b/ascon/Implementations/crypto_hash/asconhashv12/ref/permutations.h @@ -1,132 +1,88 @@ #ifndef PERMUTATIONS_H_ #define PERMUTATIONS_H_ -#ifdef DEBUG -#include -#endif +#include -typedef unsigned char u8; -typedef unsigned long long u64; +#include "ascon.h" +#include "printstate.h" +#include "round.h" -typedef struct { - u64 x0, x1, x2, x3, x4; -} state; +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 -static inline void printstate(const char* text, const state s) { -#ifdef DEBUG - printf("%s\n", text); - printf(" x0=%016llx\n", s.x0); - printf(" x1=%016llx\n", s.x1); - printf(" x2=%016llx\n", s.x2); - printf(" x3=%016llx\n", s.x3); - printf(" x4=%016llx\n", s.x4); -#else - // disable warning about unused parameters - (void)text; - (void)s; -#endif -} +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 -static inline u64 BYTES_TO_U64(const u8* bytes, int n) { - int i; - u64 x = 0; - for (i = 0; i < n; i++) x |= ((u64)bytes[i]) << (56 - 8 * i); - return x; -} +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 -static inline void U64_TO_BYTES(u8* bytes, const u64 x, int n) { - int i; - for (i = 0; i < n; i++) bytes[i] = (u8)(x >> (56 - 8 * i)); -} +#define ASCON_HASH_BYTES 32 -static inline u64 BYTE_MASK(int n) { - int i; - u64 x = 0; - for (i = 0; i < n; i++) x |= 0xffull << (56 - 8 * i); - return x; -} +#define ASCON_128_IV \ + (((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) -static inline u64 ROTR64(u64 x, int n) { return (x << (64 - n)) | (x >> n); } +#define ASCON_128A_IV \ + (((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) -static inline void ROUND(u8 C, state* p) { - state s = *p; - state t; - // addition of round constant - s.x2 ^= C; - printstate(" addition of round constant:", s); - // substitution layer - s.x0 ^= s.x4; - s.x4 ^= s.x3; - s.x2 ^= s.x1; - // start of keccak s-box - t.x0 = ~s.x0; - t.x1 = ~s.x1; - t.x2 = ~s.x2; - t.x3 = ~s.x3; - t.x4 = ~s.x4; - t.x0 &= s.x1; - t.x1 &= s.x2; - t.x2 &= s.x3; - t.x3 &= s.x4; - t.x4 &= s.x0; - s.x0 ^= t.x1; - s.x1 ^= t.x2; - s.x2 ^= t.x3; - s.x3 ^= t.x4; - s.x4 ^= t.x0; - // end of keccak s-box - s.x1 ^= s.x0; - s.x0 ^= s.x4; - s.x3 ^= s.x2; - s.x2 = ~s.x2; - printstate(" substitution layer:", s); - // linear diffusion layer - s.x0 ^= ROTR64(s.x0, 19) ^ ROTR64(s.x0, 28); - s.x1 ^= ROTR64(s.x1, 61) ^ ROTR64(s.x1, 39); - s.x2 ^= ROTR64(s.x2, 1) ^ ROTR64(s.x2, 6); - s.x3 ^= ROTR64(s.x3, 10) ^ ROTR64(s.x3, 17); - s.x4 ^= ROTR64(s.x4, 7) ^ ROTR64(s.x4, 41); - printstate(" linear diffusion layer:", s); - *p = s; -} +#define ASCON_80PQ_IV \ + (((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) -static inline void P12(state* s) { - printstate(" permutation input:", *s); - ROUND(0xf0, s); - ROUND(0xe1, s); - ROUND(0xd2, s); - ROUND(0xc3, s); - ROUND(0xb4, s); - ROUND(0xa5, s); - ROUND(0x96, s); - ROUND(0x87, s); - ROUND(0x78, s); - ROUND(0x69, s); - ROUND(0x5a, s); - ROUND(0x4b, s); -} +#define ASCON_HASH_IV \ + (((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) -static inline void P8(state* s) { - printstate(" permutation input:", *s); - ROUND(0xb4, s); - ROUND(0xa5, s); - ROUND(0x96, s); - ROUND(0x87, s); - ROUND(0x78, s); - ROUND(0x69, s); - ROUND(0x5a, s); - ROUND(0x4b, s); +#define ASCON_XOF_IV \ + (((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) + +static inline void P12(state_t* s) { + printstate(" permutation input", s); + ROUND(s, 0xf0); + ROUND(s, 0xe1); + ROUND(s, 0xd2); + ROUND(s, 0xc3); + ROUND(s, 0xb4); + ROUND(s, 0xa5); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); } -static inline void P6(state* s) { - printstate(" permutation input:", *s); - ROUND(0x96, s); - ROUND(0x87, s); - ROUND(0x78, s); - ROUND(0x69, s); - ROUND(0x5a, s); - ROUND(0x4b, s); +static inline void P8(state_t* s) { + printstate(" permutation input", s); + ROUND(s, 0xb4); + ROUND(s, 0xa5); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); } -#endif // PERMUTATIONS_H_ +static inline void P6(state_t* s) { + printstate(" permutation input", s); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/ref/printstate.h b/ascon/Implementations/crypto_hash/asconhashv12/ref/printstate.h new file mode 100644 index 0000000..34bd476 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/ref/printstate.h @@ -0,0 +1,31 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconhashv12/ref/round.h b/ascon/Implementations/crypto_hash/asconhashv12/ref/round.h new file mode 100644 index 0000000..dcb6c81 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconhashv12/ref/round.h @@ -0,0 +1,40 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" + +static inline uint64_t ROR64(uint64_t x, int n) { + return (x << (64 - n)) | (x >> n); +} + +static inline void ROUND(state_t* s, uint8_t C) { + state_t t; + /* addition of round constant */ + s->x2 ^= C; + /* substitution layer */ + s->x0 ^= s->x4; + s->x4 ^= s->x3; + s->x2 ^= s->x1; + /* start of keccak s-box */ + t.x0 = s->x0 ^ (~s->x1 & s->x2); + t.x1 = s->x1 ^ (~s->x2 & s->x3); + t.x2 = s->x2 ^ (~s->x3 & s->x4); + t.x3 = s->x3 ^ (~s->x4 & s->x0); + t.x4 = s->x4 ^ (~s->x0 & s->x1); + /* end of keccak s-box */ + t.x1 ^= t.x0; + t.x0 ^= t.x4; + t.x3 ^= t.x2; + t.x2 = ~t.x2; + /* printstate(" substitution layer", &t); */ + /* linear diffusion layer */ + s->x0 = t.x0 ^ ROR64(t.x0, 19) ^ ROR64(t.x0, 28); + s->x1 = t.x1 ^ ROR64(t.x1, 61) ^ ROR64(t.x1, 39); + s->x2 = t.x2 ^ ROR64(t.x2, 1) ^ ROR64(t.x2, 6); + s->x3 = t.x3 ^ ROR64(t.x3, 10) ^ ROR64(t.x3, 17); + s->x4 = t.x4 ^ ROR64(t.x4, 7) ^ ROR64(t.x4, 41); + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32/api.h b/ascon/Implementations/crypto_hash/asconxofv12/bi32/api.h new file mode 100644 index 0000000..d72f706 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32/api.h @@ -0,0 +1,3 @@ +#define CRYPTO_BYTES 32 +#define ASCON_RATE 8 +#define ASCON_XOF diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32/ascon.h b/ascon/Implementations/crypto_hash/asconxofv12/bi32/ascon.h new file mode 100644 index 0000000..10a5b6e --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32/ascon.h @@ -0,0 +1,19 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#include "config.h" +#include "word.h" + +typedef struct { + word_t x0, x1, x2, x3, x4; +} state_t; + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k); +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen); +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); +void ascon_final(state_t* s, const uint8_t* k); + +#endif // ASCON_H_ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32/config.h b/ascon/Implementations/crypto_hash/asconxofv12/bi32/config.h new file mode 100644 index 0000000..b1b5080 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32/config.h @@ -0,0 +1,34 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* inline the Ascon mode */ +#ifndef ASCON_INLINE_MODE +#define ASCON_INLINE_MODE 1 +#endif + +/* inline the Ascon permutations */ +#ifndef ASCON_INLINE_PERM +#define ASCON_INLINE_PERM 0 +#endif + +/* single function for all permutations */ +#ifndef ASCON_SINGLE_PERM +#define ASCON_SINGLE_PERM 0 +#endif + +/* unroll the permutation loops */ +#ifndef ASCON_UNROLL_LOOPS +#define ASCON_UNROLL_LOOPS 0 +#endif + +/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ +#ifndef ASCON_DATA_ACCESS +#define ASCON_DATA_ACCESS 'M' +#endif + +/* make sure __forceinline is supported */ +#ifndef __forceinline +#define __forceinline inline __attribute__((always_inline)) +#endif + +#endif /* CONFIG_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32/endian.h b/ascon/Implementations/crypto_hash/asconxofv12/bi32/endian.h new file mode 100644 index 0000000..3944360 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32/endian.h @@ -0,0 +1,39 @@ +#ifndef ENDIAN_H_ +#define ENDIAN_H_ + +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif +#define U64BIG(x) (x) +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +#elif defined(_MSC_VER) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) +#define U32BIG(x) \ + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) + +#else +#error "Ascon byte order macros not defined in endian.h" +#endif + +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32/hash.c b/ascon/Implementations/crypto_hash/asconxofv12/bi32/hash.c new file mode 100644 index 0000000..34dec63 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32/hash.c @@ -0,0 +1,51 @@ +#include "api.h" +#include "ascon.h" +#include "loadstore.h" +#include "permutations.h" +#include "printstate.h" + +int crypto_hash(uint8_t* out, const uint8_t* in, uint64_t len) { + state_t s; + + /* initialization */ +#ifdef ASCON_HASH + s.x0 = ASCON_HASH_IV0; + s.x1 = ASCON_HASH_IV1; + s.x2 = ASCON_HASH_IV2; + s.x3 = ASCON_HASH_IV3; + s.x4 = ASCON_HASH_IV4; +#endif +#ifdef ASCON_XOF + s.x0 = ASCON_XOF_IV0; + s.x1 = ASCON_XOF_IV1; + s.x2 = ASCON_XOF_IV2; + s.x3 = ASCON_XOF_IV3; + s.x4 = ASCON_XOF_IV4; +#endif + printstate("initialization", &s); + + /* absorb plaintext */ + while (len >= ASCON_RATE) { + XOR(s.x0, LOAD64(in)); + P12(&s); + in += ASCON_RATE; + len -= ASCON_RATE; + } + if (len) XOR(s.x0, LOAD(in, len)); + XOR(s.x0, PAD(len)); + P12(&s); + printstate("absorb plaintext", &s); + + /* squeeze output */ + len = CRYPTO_BYTES; + while (len > ASCON_RATE) { + STORE64(out, s.x0); + P12(&s); + out += ASCON_RATE; + len -= ASCON_RATE; + } + STORE64(out, s.x0); + printstate("squeeze output", &s); + + return 0; +} diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32/implementors b/ascon/Implementations/crypto_hash/asconxofv12/bi32/implementors new file mode 100644 index 0000000..b110c1a --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32/implementors @@ -0,0 +1,2 @@ +Christoph Dobraunig +Martin Schläffer diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32/loadstore.h b/ascon/Implementations/crypto_hash/asconxofv12/bi32/loadstore.h new file mode 100644 index 0000000..6cb0cca --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32/loadstore.h @@ -0,0 +1,118 @@ +#ifndef LOADSTORE_H_ +#define LOADSTORE_H_ + +#include + +#include "config.h" +#include "endian.h" +#include "word.h" + +/* 64-bit LSB mask (undefined for n == 0) */ +#define MASK(n) (~0ull >> (64 - (n))) + +/* get byte from Ascon 64-bit word */ +#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i)))) + +/* set byte in Ascon 64-bit word */ +#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i))) + +#if ASCON_DATA_ACCESS == 'W' + +#ifndef NDEBUG +#pragma message("Using wordwise data access") +#endif + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = U64BIG(*(uint64_t*)bytes); + return U64TOWORD(x); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = U64BIG(*(uint64_t*)bytes & MASK(8 * n)); + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(8 * n); + *(uint64_t*)bytes |= U64BIG(x); +} + +#elif ASCON_DATA_ACCESS == 'M' + +#ifndef NDEBUG +#pragma message("Using memcpy to access data") +#endif + +#include + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + memcpy((uint8_t*)&x, bytes, n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = U64BIG(WORDTOU64(w)); + memcpy(bytes, (uint8_t*)&x, n); +} + +#elif ASCON_DATA_ACCESS == 'B' + +#ifndef NDEBUG +#pragma message("Using bytewise data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#elif ASCON_DATA_ACCESS == 'H' + +#ifndef NDEBUG +#pragma message("Using hybrid data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + if (n == 8) + x = U64BIG(*(uint64_t*)bytes); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + if (n == 8) + *(uint64_t*)bytes = U64BIG(x); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#else +#error "Ascon data access macro not defined correctly" +#endif + +#endif /* LOADSTORE_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32/permutations.c b/ascon/Implementations/crypto_hash/asconxofv12/bi32/permutations.c new file mode 100644 index 0000000..1bca2ef --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32/permutations.c @@ -0,0 +1,35 @@ +#include "permutations.h" + +#include "round.h" + +#if !ASCON_UNROLL_LOOPS || ASCON_SINGLE_PERM + +const uint8_t constants[][2] = {{0xc, 0xc}, {0x9, 0xc}, {0xc, 0x9}, {0x9, 0x9}, + {0x6, 0xc}, {0x3, 0xc}, {0x6, 0x9}, {0x3, 0x9}, + {0xc, 0x6}, {0x9, 0x6}, {0xc, 0x3}, {0x9, 0x3}}; + +#endif + +#if ASCON_INLINE_PERM + +#elif ASCON_SINGLE_PERM + +void P(state_t* s, uint8_t rounds) { + printstate(" permutation input", s); + for (int i = START(rounds); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s) { P12ROUNDS(s); } + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16 +void P8(state_t* s) { P8ROUNDS(s); } +#endif + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 +void P6(state_t* s) { P6ROUNDS(s); } +#endif + +#endif diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32/permutations.h b/ascon/Implementations/crypto_hash/asconxofv12/bi32/permutations.h new file mode 100644 index 0000000..ef338f1 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32/permutations.h @@ -0,0 +1,189 @@ +#ifndef PERMUTATIONS_H_ +#define PERMUTATIONS_H_ + +#include + +#include "api.h" +#include "ascon.h" +#include "printstate.h" +#include "round.h" + +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 + +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 + +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 + +#define ASCON_HASH_BYTES 32 + +#define ASCON_128_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_128A_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) + +#define ASCON_80PQ_IV \ + U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_HASH_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) + +#define ASCON_XOF_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) + +#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) +#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) +#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) +#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) +#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) + +#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) +#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) +#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) +#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) +#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 +#define IV ASCON_128_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#if ASCON_RATE == 16 +#define IV ASCON_128A_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define PB P8 +#endif + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 +#define IV ASCON_80PQ_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#define START(n) (12 - n) + +#if ASCON_UNROLL_LOOPS + +__forceinline void P12ROUNDS(state_t* s) { + ROUND(s, 0xc, 0xc); + ROUND(s, 0x9, 0xc); + ROUND(s, 0xc, 0x9); + ROUND(s, 0x9, 0x9); + ROUND(s, 0x6, 0xc); + ROUND(s, 0x3, 0xc); + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +__forceinline void P8ROUNDS(state_t* s) { + ROUND(s, 0x6, 0xc); + ROUND(s, 0x3, 0xc); + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +__forceinline void P6ROUNDS(state_t* s) { + ROUND(s, 0x6, 0x9); + ROUND(s, 0x3, 0x9); + ROUND(s, 0xc, 0x6); + ROUND(s, 0x9, 0x6); + ROUND(s, 0xc, 0x3); + ROUND(s, 0x9, 0x3); +} + +#else /* !ASCON_UNROLL_LOOPS */ + +extern const uint8_t constants[][2]; + +__forceinline void P12ROUNDS(state_t* s) { + for (int i = START(12); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +__forceinline void P8ROUNDS(state_t* s) { + for (int i = START(8); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +__forceinline void P6ROUNDS(state_t* s) { + for (int i = START(6); i < 12; i++) + ROUND(s, constants[i][0], constants[i][1]); +} + +#endif + +#if ASCON_INLINE_PERM + +__forceinline void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +__forceinline void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} + +__forceinline void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#elif ASCON_SINGLE_PERM + +#define P12(s) P(s, 12) +#define P8(s) P(s, 8) +#define P6(s) P(s, 6) + +void P(state_t* s, uint8_t rounds); + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s); +void P8(state_t* s); +void P6(state_t* s); + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#endif + +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32/printstate.h b/ascon/Implementations/crypto_hash/asconxofv12/bi32/printstate.h new file mode 100644 index 0000000..34bd476 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32/printstate.h @@ -0,0 +1,31 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32/round.h b/ascon/Implementations/crypto_hash/asconxofv12/bi32/round.h new file mode 100644 index 0000000..d8ea3b6 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32/round.h @@ -0,0 +1,85 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" + +__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { + *K0 = WORD_T(0); + *K1 = WORD_T(0); + *K2 = WORD_T(0); +} + +__forceinline void PINIT(state_t* s) { + s->x0 = WORD_T(0); + s->x1 = WORD_T(0); + s->x2 = WORD_T(0); + s->x3 = WORD_T(0); + s->x4 = WORD_T(0); +} + +__forceinline void ROUND(state_t* s, uint32_t C_e, uint32_t C_o) { + state_t t; + /* round constant */ + s->x2.e ^= C_e; + s->x2.o ^= C_o; + /* s-box layer */ + s->x0.e ^= s->x4.e; + s->x0.o ^= s->x4.o; + s->x4.e ^= s->x3.e; + s->x4.o ^= s->x3.o; + s->x2.e ^= s->x1.e; + s->x2.o ^= s->x1.o; + t.x0.e = s->x0.e; + t.x0.o = s->x0.o; + t.x4.e = s->x4.e; + t.x4.o = s->x4.o; + t.x3.e = s->x3.e; + t.x3.o = s->x3.o; + t.x1.e = s->x1.e; + t.x1.o = s->x1.o; + t.x2.e = s->x2.e; + t.x2.o = s->x2.o; + s->x0.e = t.x0.e ^ (~t.x1.e & t.x2.e); + s->x0.o = t.x0.o ^ (~t.x1.o & t.x2.o); + s->x2.e = t.x2.e ^ (~t.x3.e & t.x4.e); + s->x2.o = t.x2.o ^ (~t.x3.o & t.x4.o); + s->x4.e = t.x4.e ^ (~t.x0.e & t.x1.e); + s->x4.o = t.x4.o ^ (~t.x0.o & t.x1.o); + s->x1.e = t.x1.e ^ (~t.x2.e & t.x3.e); + s->x1.o = t.x1.o ^ (~t.x2.o & t.x3.o); + s->x3.e = t.x3.e ^ (~t.x4.e & t.x0.e); + s->x3.o = t.x3.o ^ (~t.x4.o & t.x0.o); + s->x1.e ^= s->x0.e; + s->x1.o ^= s->x0.o; + s->x3.e ^= s->x2.e; + s->x3.o ^= s->x2.o; + s->x0.e ^= s->x4.e; + s->x0.o ^= s->x4.o; + /* linear layer */ + t.x0.e = s->x0.e ^ ROR32(s->x0.o, 4); + t.x0.o = s->x0.o ^ ROR32(s->x0.e, 5); + t.x1.e = s->x1.e ^ ROR32(s->x1.e, 11); + t.x1.o = s->x1.o ^ ROR32(s->x1.o, 11); + t.x2.e = s->x2.e ^ ROR32(s->x2.o, 2); + t.x2.o = s->x2.o ^ ROR32(s->x2.e, 3); + t.x3.e = s->x3.e ^ ROR32(s->x3.o, 3); + t.x3.o = s->x3.o ^ ROR32(s->x3.e, 4); + t.x4.e = s->x4.e ^ ROR32(s->x4.e, 17); + t.x4.o = s->x4.o ^ ROR32(s->x4.o, 17); + s->x0.e ^= ROR32(t.x0.o, 9); + s->x0.o ^= ROR32(t.x0.e, 10); + s->x1.e ^= ROR32(t.x1.o, 19); + s->x1.o ^= ROR32(t.x1.e, 20); + s->x2.e ^= t.x2.o; + s->x2.o ^= ROR32(t.x2.e, 1); + s->x3.e ^= ROR32(t.x3.e, 5); + s->x3.o ^= ROR32(t.x3.o, 5); + s->x4.e ^= ROR32(t.x4.o, 3); + s->x4.o ^= ROR32(t.x4.e, 4); + s->x2.e = ~s->x2.e; + s->x2.o = ~s->x2.o; + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/bi32/word.h b/ascon/Implementations/crypto_hash/asconxofv12/bi32/word.h new file mode 100644 index 0000000..8ffcaaa --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/bi32/word.h @@ -0,0 +1,117 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#include "config.h" + +typedef struct { + uint32_t e; + uint32_t o; +} word_t; + +__forceinline word_t WORD_T(uint64_t x) { + return (word_t){.o = x >> 32, .e = x}; +} + +__forceinline uint64_t UINT64_T(word_t x) { return (uint64_t)x.o << 32 | x.e; } + +__forceinline uint64_t TOBI32(uint64_t in); + +__forceinline uint64_t FROMBI32(uint64_t in); + +__forceinline word_t U64TOWORD(uint64_t x) { + uint64_t w = TOBI32(x); + return (word_t){.o = w >> 32, .e = w}; +} + +__forceinline uint64_t WORDTOU64(word_t w) { + return FROMBI32((uint64_t)w.o << 32 | w.e); +} + +#define XOR(a, b) \ + do { \ + word_t tb = b; \ + (a).e ^= tb.e; \ + (a).o ^= tb.o; \ + } while (0) + +#define AND(a, b) \ + do { \ + word_t tb = b; \ + (a).e &= tb.e; \ + (a).o &= tb.o; \ + } while (0) + +__forceinline uint32_t ROR32(uint32_t x, int n) { + return x >> n | x << (32 - n); +} + +__forceinline word_t ROR64(word_t x, int n) { + word_t r; + r.e = (n % 2) ? ROR32(x.o, (n - 1) / 2) : ROR32(x.e, n / 2); + r.o = (n % 2) ? ROR32(x.e, (n + 1) / 2) : ROR32(x.o, n / 2); + return r; +} + +__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { + word_t r; + r.o = lo2hi.o << 16 | hi2lo.o >> 16; + r.e = lo2hi.e << 16 | hi2lo.e >> 16; + return r; +} + +__forceinline int NOTZERO(word_t a, word_t b) { + int result = 0; + for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i]; + for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i]; + return result; +} + +/* set padding byte in 64-bit Ascon word */ +__forceinline word_t PAD(int i) { + return WORD_T((uint64_t)(0x08 << (28 - 4 * i)) << 32); +} + +/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ +__forceinline word_t XMASK(int n) { + uint32_t mask = 0x0fffffff >> (n * 4 - 4); + return WORD_T((uint64_t)mask << 32 | mask); +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t TOBI32(uint64_t in) { + uint32_t hi = in >> 32; + uint32_t lo = in; + uint32_t r0, r1; + r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); + r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); + r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); + r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); + r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); + r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); + r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); + r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); + r0 = (lo & 0x0000FFFF) | (hi << 16); + r1 = (lo >> 16) | (hi & 0xFFFF0000); + return (uint64_t)r1 << 32 | r0; +} + +/* credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +__forceinline uint64_t FROMBI32(uint64_t in) { + uint32_t r0 = in; + uint32_t r1 = in >> 32; + uint32_t lo = (r0 & 0x0000FFFF) | (r1 << 16); + uint32_t hi = (r0 >> 16) | (r1 & 0xFFFF0000); + r0 = (lo ^ (lo >> 8)) & 0x0000FF00, lo ^= r0 ^ (r0 << 8); + r0 = (lo ^ (lo >> 4)) & 0x00F000F0, lo ^= r0 ^ (r0 << 4); + r0 = (lo ^ (lo >> 2)) & 0x0C0C0C0C, lo ^= r0 ^ (r0 << 2); + r0 = (lo ^ (lo >> 1)) & 0x22222222, lo ^= r0 ^ (r0 << 1); + r1 = (hi ^ (hi >> 8)) & 0x0000FF00, hi ^= r1 ^ (r1 << 8); + r1 = (hi ^ (hi >> 4)) & 0x00F000F0, hi ^= r1 ^ (r1 << 4); + r1 = (hi ^ (hi >> 2)) & 0x0C0C0C0C, hi ^= r1 ^ (r1 << 2); + r1 = (hi ^ (hi >> 1)) & 0x22222222, hi ^= r1 ^ (r1 << 1); + return (uint64_t)hi << 32 | lo; +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/opt64/api.h b/ascon/Implementations/crypto_hash/asconxofv12/opt64/api.h index ae8c7f6..d72f706 100644 --- a/ascon/Implementations/crypto_hash/asconxofv12/opt64/api.h +++ b/ascon/Implementations/crypto_hash/asconxofv12/opt64/api.h @@ -1 +1,3 @@ #define CRYPTO_BYTES 32 +#define ASCON_RATE 8 +#define ASCON_XOF diff --git a/ascon/Implementations/crypto_hash/asconxofv12/opt64/ascon.h b/ascon/Implementations/crypto_hash/asconxofv12/opt64/ascon.h new file mode 100644 index 0000000..10a5b6e --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/opt64/ascon.h @@ -0,0 +1,19 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#include "config.h" +#include "word.h" + +typedef struct { + word_t x0, x1, x2, x3, x4; +} state_t; + +void ascon_init(state_t* s, const uint8_t* npub, const uint8_t* k); +void ascon_absorb(state_t* s, const uint8_t* ad, uint64_t adlen); +void ascon_encrypt(state_t* s, uint8_t* c, const uint8_t* m, uint64_t mlen); +void ascon_decrypt(state_t* s, uint8_t* m, const uint8_t* c, uint64_t clen); +void ascon_final(state_t* s, const uint8_t* k); + +#endif // ASCON_H_ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/opt64/config.h b/ascon/Implementations/crypto_hash/asconxofv12/opt64/config.h new file mode 100644 index 0000000..8d8a1a0 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/opt64/config.h @@ -0,0 +1,34 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* inline the Ascon mode */ +#ifndef ASCON_INLINE_MODE +#define ASCON_INLINE_MODE 1 +#endif + +/* inline the Ascon permutations */ +#ifndef ASCON_INLINE_PERM +#define ASCON_INLINE_PERM 1 +#endif + +/* single function for all permutations */ +#ifndef ASCON_SINGLE_PERM +#define ASCON_SINGLE_PERM 0 +#endif + +/* unroll the permutation loops */ +#ifndef ASCON_UNROLL_LOOPS +#define ASCON_UNROLL_LOOPS 1 +#endif + +/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ +#ifndef ASCON_DATA_ACCESS +#define ASCON_DATA_ACCESS 'H' +#endif + +/* make sure __forceinline is supported */ +#ifndef __forceinline +#define __forceinline inline __attribute__((always_inline)) +#endif + +#endif /* CONFIG_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/opt64/endian.h b/ascon/Implementations/crypto_hash/asconxofv12/opt64/endian.h index ffbdcd9..3944360 100644 --- a/ascon/Implementations/crypto_hash/asconxofv12/opt64/endian.h +++ b/ascon/Implementations/crypto_hash/asconxofv12/opt64/endian.h @@ -3,29 +3,37 @@ #if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -// macros for big endian machines +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif #define U64BIG(x) (x) #define U32BIG(x) (x) #define U16BIG(x) (x) #elif defined(_MSC_VER) || \ - (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -// macros for little endian machines -#define U64BIG(x) \ - ((((x) & 0x00000000000000FFULL) << 56) | (((x) & 0x000000000000FF00ULL) << 40) | \ - (((x) & 0x0000000000FF0000ULL) << 24) | (((x) & 0x00000000FF000000ULL) << 8) | \ - (((x) & 0x000000FF00000000ULL) >> 8) | (((x) & 0x0000FF0000000000ULL) >> 24) | \ - (((x) & 0x00FF000000000000ULL) >> 40) | (((x) & 0xFF00000000000000ULL) >> 56)) +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) #define U32BIG(x) \ - ((((x) & 0x000000FF) << 24) | (((x) & 0x0000FF00) << 8) | \ - (((x) & 0x00FF0000) >> 8) | (((x) & 0xFF000000) >> 24)) -#define U16BIG(x) \ - ((((x) & 0x00FF) << 8) | (((x) & 0xFF00) >> 8)) + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) #else -#error "ascon byte order macros not defined in endian.h" +#error "Ascon byte order macros not defined in endian.h" #endif -#endif // ENDIAN_H_ - +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/opt64/hash.c b/ascon/Implementations/crypto_hash/asconxofv12/opt64/hash.c index d74306e..34dec63 100644 --- a/ascon/Implementations/crypto_hash/asconxofv12/opt64/hash.c +++ b/ascon/Implementations/crypto_hash/asconxofv12/opt64/hash.c @@ -1,45 +1,51 @@ #include "api.h" -#include "endian.h" +#include "ascon.h" +#include "loadstore.h" #include "permutations.h" - -#define RATE (64 / 8) -#define PA_ROUNDS 12 - -int crypto_hash(unsigned char* out, const unsigned char* in, - unsigned long long inlen) { - state s; - u64 outlen; - u64 i; - - // initialization - s.x0 = 0xb57e273b814cd416ull; - s.x1 = 0x2b51042562ae2420ull; - s.x2 = 0x66a3a7768ddf2218ull; - s.x3 = 0x5aad0a7a8153650cull; - s.x4 = 0x4f3e0e32539493b6ull; - - // absorb plaintext - while (inlen >= RATE) { - s.x0 ^= U64BIG(*(u64*)in); - P12(); - inlen -= RATE; - in += RATE; +#include "printstate.h" + +int crypto_hash(uint8_t* out, const uint8_t* in, uint64_t len) { + state_t s; + + /* initialization */ +#ifdef ASCON_HASH + s.x0 = ASCON_HASH_IV0; + s.x1 = ASCON_HASH_IV1; + s.x2 = ASCON_HASH_IV2; + s.x3 = ASCON_HASH_IV3; + s.x4 = ASCON_HASH_IV4; +#endif +#ifdef ASCON_XOF + s.x0 = ASCON_XOF_IV0; + s.x1 = ASCON_XOF_IV1; + s.x2 = ASCON_XOF_IV2; + s.x3 = ASCON_XOF_IV3; + s.x4 = ASCON_XOF_IV4; +#endif + printstate("initialization", &s); + + /* absorb plaintext */ + while (len >= ASCON_RATE) { + XOR(s.x0, LOAD64(in)); + P12(&s); + in += ASCON_RATE; + len -= ASCON_RATE; } - for (i = 0; i < inlen; ++i, ++in) s.x0 ^= INS_BYTE64(*in, i); - s.x0 ^= INS_BYTE64(0x80, inlen); - - P12(); - - // absorb plaintext - outlen = CRYPTO_BYTES; - while (outlen > RATE) { - *(u64*)out = U64BIG(s.x0); - P12(); - outlen -= RATE; - out += RATE; + if (len) XOR(s.x0, LOAD(in, len)); + XOR(s.x0, PAD(len)); + P12(&s); + printstate("absorb plaintext", &s); + + /* squeeze output */ + len = CRYPTO_BYTES; + while (len > ASCON_RATE) { + STORE64(out, s.x0); + P12(&s); + out += ASCON_RATE; + len -= ASCON_RATE; } - *(u64*)out = U64BIG(s.x0); + STORE64(out, s.x0); + printstate("squeeze output", &s); return 0; } - diff --git a/ascon/Implementations/crypto_hash/asconxofv12/opt64/loadstore.h b/ascon/Implementations/crypto_hash/asconxofv12/opt64/loadstore.h new file mode 100644 index 0000000..6cb0cca --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/opt64/loadstore.h @@ -0,0 +1,118 @@ +#ifndef LOADSTORE_H_ +#define LOADSTORE_H_ + +#include + +#include "config.h" +#include "endian.h" +#include "word.h" + +/* 64-bit LSB mask (undefined for n == 0) */ +#define MASK(n) (~0ull >> (64 - (n))) + +/* get byte from Ascon 64-bit word */ +#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i)))) + +/* set byte in Ascon 64-bit word */ +#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i))) + +#if ASCON_DATA_ACCESS == 'W' + +#ifndef NDEBUG +#pragma message("Using wordwise data access") +#endif + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = U64BIG(*(uint64_t*)bytes); + return U64TOWORD(x); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = U64BIG(*(uint64_t*)bytes & MASK(8 * n)); + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(8 * n); + *(uint64_t*)bytes |= U64BIG(x); +} + +#elif ASCON_DATA_ACCESS == 'M' + +#ifndef NDEBUG +#pragma message("Using memcpy to access data") +#endif + +#include + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + memcpy((uint8_t*)&x, bytes, n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = U64BIG(WORDTOU64(w)); + memcpy(bytes, (uint8_t*)&x, n); +} + +#elif ASCON_DATA_ACCESS == 'B' + +#ifndef NDEBUG +#pragma message("Using bytewise data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#elif ASCON_DATA_ACCESS == 'H' + +#ifndef NDEBUG +#pragma message("Using hybrid data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + if (n == 8) + x = U64BIG(*(uint64_t*)bytes); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + if (n == 8) + *(uint64_t*)bytes = U64BIG(x); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#else +#error "Ascon data access macro not defined correctly" +#endif + +#endif /* LOADSTORE_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/opt64/permutations.c b/ascon/Implementations/crypto_hash/asconxofv12/opt64/permutations.c new file mode 100644 index 0000000..3aecaa6 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/opt64/permutations.c @@ -0,0 +1,26 @@ +#include "permutations.h" + +#include "round.h" + +#if ASCON_INLINE_PERM + +#elif ASCON_SINGLE_PERM + +void P(state_t* s, uint8_t rounds) { + printstate(" permutation input", s); + for (int i = START(rounds); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s) { P12ROUNDS(s); } + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16 +void P8(state_t* s) { P8ROUNDS(s); } +#endif + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 +void P6(state_t* s) { P6ROUNDS(s); } +#endif + +#endif diff --git a/ascon/Implementations/crypto_hash/asconxofv12/opt64/permutations.h b/ascon/Implementations/crypto_hash/asconxofv12/opt64/permutations.h index 4af1e2c..6172dd5 100644 --- a/ascon/Implementations/crypto_hash/asconxofv12/opt64/permutations.h +++ b/ascon/Implementations/crypto_hash/asconxofv12/opt64/permutations.h @@ -1,102 +1,184 @@ #ifndef PERMUTATIONS_H_ #define PERMUTATIONS_H_ -typedef unsigned char u8; -typedef unsigned long long u64; - -typedef struct { - u64 x0, x1, x2, x3, x4; -} state; - -#define EXT_BYTE64(x, n) ((u8)((u64)(x) >> (8 * (7 - (n))))) -#define INS_BYTE64(x, n) ((u64)(x) << (8 * (7 - (n)))) -#define ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) - -#define ROUND(C) \ - do { \ - state t; \ - s.x2 ^= C; \ - s.x0 ^= s.x4; \ - s.x4 ^= s.x3; \ - s.x2 ^= s.x1; \ - t.x0 = s.x0; \ - t.x4 = s.x4; \ - t.x3 = s.x3; \ - t.x1 = s.x1; \ - t.x2 = s.x2; \ - s.x0 = t.x0 ^ ((~t.x1) & t.x2); \ - s.x2 = t.x2 ^ ((~t.x3) & t.x4); \ - s.x4 = t.x4 ^ ((~t.x0) & t.x1); \ - s.x1 = t.x1 ^ ((~t.x2) & t.x3); \ - s.x3 = t.x3 ^ ((~t.x4) & t.x0); \ - s.x1 ^= s.x0; \ - t.x1 = s.x1; \ - s.x1 = ROTR64(s.x1, 39); \ - s.x3 ^= s.x2; \ - t.x2 = s.x2; \ - s.x2 = ROTR64(s.x2, 1); \ - t.x4 = s.x4; \ - t.x2 ^= s.x2; \ - s.x2 = ROTR64(s.x2, 6 - 1); \ - t.x3 = s.x3; \ - t.x1 ^= s.x1; \ - s.x3 = ROTR64(s.x3, 10); \ - s.x0 ^= s.x4; \ - s.x4 = ROTR64(s.x4, 7); \ - t.x3 ^= s.x3; \ - s.x2 ^= t.x2; \ - s.x1 = ROTR64(s.x1, 61 - 39); \ - t.x0 = s.x0; \ - s.x2 = ~s.x2; \ - s.x3 = ROTR64(s.x3, 17 - 10); \ - t.x4 ^= s.x4; \ - s.x4 = ROTR64(s.x4, 41 - 7); \ - s.x3 ^= t.x3; \ - s.x1 ^= t.x1; \ - s.x0 = ROTR64(s.x0, 19); \ - s.x4 ^= t.x4; \ - t.x0 ^= s.x0; \ - s.x0 = ROTR64(s.x0, 28 - 19); \ - s.x0 ^= t.x0; \ - } while (0) - -#define P12() \ - do { \ - ROUND(0xf0); \ - ROUND(0xe1); \ - ROUND(0xd2); \ - ROUND(0xc3); \ - ROUND(0xb4); \ - ROUND(0xa5); \ - ROUND(0x96); \ - ROUND(0x87); \ - ROUND(0x78); \ - ROUND(0x69); \ - ROUND(0x5a); \ - ROUND(0x4b); \ - } while (0) - -#define P8() \ - do { \ - ROUND(0xb4); \ - ROUND(0xa5); \ - ROUND(0x96); \ - ROUND(0x87); \ - ROUND(0x78); \ - ROUND(0x69); \ - ROUND(0x5a); \ - ROUND(0x4b); \ - } while (0) - -#define P6() \ - do { \ - ROUND(0x96); \ - ROUND(0x87); \ - ROUND(0x78); \ - ROUND(0x69); \ - ROUND(0x5a); \ - ROUND(0x4b); \ - } while (0) - -#endif // PERMUTATIONS_H_ +#include +#include "api.h" +#include "ascon.h" +#include "printstate.h" +#include "round.h" + +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 + +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 + +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 + +#define ASCON_HASH_BYTES 32 + +#define ASCON_128_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_128A_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) + +#define ASCON_80PQ_IV \ + U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_HASH_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) + +#define ASCON_XOF_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) + +#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) +#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) +#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) +#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) +#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) + +#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) +#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) +#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) +#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) +#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 +#define IV ASCON_128_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#if ASCON_RATE == 16 +#define IV ASCON_128A_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define PB P8 +#endif + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 +#define IV ASCON_80PQ_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#define START(n) ((3 + (n)) << 4 | (12 - (n))) + +#if ASCON_UNROLL_LOOPS + +__forceinline void P12ROUNDS(state_t* s) { + ROUND(s, 0xf0); + ROUND(s, 0xe1); + ROUND(s, 0xd2); + ROUND(s, 0xc3); + ROUND(s, 0xb4); + ROUND(s, 0xa5); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +__forceinline void P8ROUNDS(state_t* s) { + ROUND(s, 0xb4); + ROUND(s, 0xa5); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +__forceinline void P6ROUNDS(state_t* s) { + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +#else /* !ASCON_UNROLL_LOOPS */ + +__forceinline void P12ROUNDS(state_t* s) { + for (int i = START(12); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +__forceinline void P8ROUNDS(state_t* s) { + for (int i = START(8); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +__forceinline void P6ROUNDS(state_t* s) { + for (int i = START(6); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +#endif + +#if ASCON_INLINE_PERM + +__forceinline void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +__forceinline void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} + +__forceinline void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#elif ASCON_SINGLE_PERM + +#define P12(s) P(s, 12) +#define P8(s) P(s, 8) +#define P6(s) P(s, 6) + +void P(state_t* s, uint8_t rounds); + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s); +void P8(state_t* s); +void P6(state_t* s); + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#endif + +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/opt64/printstate.h b/ascon/Implementations/crypto_hash/asconxofv12/opt64/printstate.h new file mode 100644 index 0000000..34bd476 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/opt64/printstate.h @@ -0,0 +1,31 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/opt64/round.h b/ascon/Implementations/crypto_hash/asconxofv12/opt64/round.h new file mode 100644 index 0000000..077cbfd --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/opt64/round.h @@ -0,0 +1,69 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" + +__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { + *K0 = WORD_T(0); + *K1 = WORD_T(0); + *K2 = WORD_T(0); +} + +__forceinline void PINIT(state_t* s) { + s->x0 = WORD_T(0); + s->x1 = WORD_T(0); + s->x2 = WORD_T(0); + s->x3 = WORD_T(0); + s->x4 = WORD_T(0); +} + +__forceinline void ROUND(state_t* s, uint64_t C) { + state_t t; + s->x2 ^= C; + s->x0 ^= s->x4; + s->x4 ^= s->x3; + s->x2 ^= s->x1; + t.x0 = s->x0; + t.x4 = s->x4; + t.x3 = s->x3; + t.x1 = s->x1; + t.x2 = s->x2; + s->x0 = t.x0 ^ (~t.x1 & t.x2); + s->x2 = t.x2 ^ (~t.x3 & t.x4); + s->x4 = t.x4 ^ (~t.x0 & t.x1); + s->x1 = t.x1 ^ (~t.x2 & t.x3); + s->x3 = t.x3 ^ (~t.x4 & t.x0); + s->x1 ^= s->x0; + t.x1 = s->x1; + s->x1 = ROR64(s->x1, 39); + s->x3 ^= s->x2; + t.x2 = s->x2; + s->x2 = ROR64(s->x2, 1); + t.x4 = s->x4; + t.x2 ^= s->x2; + s->x2 = ROR64(s->x2, 6 - 1); + t.x3 = s->x3; + t.x1 ^= s->x1; + s->x3 = ROR64(s->x3, 10); + s->x0 ^= s->x4; + s->x4 = ROR64(s->x4, 7); + t.x3 ^= s->x3; + s->x2 ^= t.x2; + s->x1 = ROR64(s->x1, 61 - 39); + t.x0 = s->x0; + s->x2 = ~s->x2; + s->x3 = ROR64(s->x3, 17 - 10); + t.x4 ^= s->x4; + s->x4 = ROR64(s->x4, 41 - 7); + s->x3 ^= t.x3; + s->x1 ^= t.x1; + s->x0 = ROR64(s->x0, 19); + s->x4 ^= t.x4; + t.x0 ^= s->x0; + s->x0 = ROR64(s->x0, 28 - 19); + s->x0 ^= t.x0; + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/opt64/word.h b/ascon/Implementations/crypto_hash/asconxofv12/opt64/word.h new file mode 100644 index 0000000..5d601bb --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/opt64/word.h @@ -0,0 +1,47 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#include "config.h" + +typedef uint64_t word_t; + +#define WORD_T +#define UINT64_T + +#define U64TOWORD +#define WORDTOU64 + +#define XOR(a, b) \ + do { \ + (a) ^= (b); \ + } while (0) + +#define AND(a, b) \ + do { \ + (a) &= (b); \ + } while (0) + +__forceinline word_t ROR64(word_t x, int n) { return x >> n | x << (64 - n); } + +__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { + return lo2hi << 32 | hi2lo >> 32; +} + +__forceinline int NOTZERO(word_t a, word_t b) { + int result = 0; + for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i]; + for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i]; + return result; +} + +/* set padding byte in 64-bit Ascon word */ +__forceinline word_t PAD(int i) { return WORD_T(0x80ull << (56 - 8 * i)); } + +/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ +__forceinline word_t XMASK(int n) { + return WORD_T(0x00ffffffffffffffull >> (n * 8 - 8)); +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/api.h b/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/api.h new file mode 100644 index 0000000..d72f706 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/api.h @@ -0,0 +1,3 @@ +#define CRYPTO_BYTES 32 +#define ASCON_RATE 8 +#define ASCON_XOF diff --git a/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/ascon.h b/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/ascon.h new file mode 100644 index 0000000..aa685d3 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/ascon.h @@ -0,0 +1,24 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#include "config.h" +#include "word.h" + +typedef struct { + word_t x0, x1, x2, x3, x4; +} state_t; + +#define ASCON_AD 0 +#define ASCON_ENC 1 +#define ASCON_DEC 2 + +void process_data(state_t* s, uint8_t* out, const uint8_t* in, uint64_t len, + uint8_t mode); + +void ascon_core(state_t* s, uint8_t* out, const uint8_t* in, uint64_t tlen, + const uint8_t* ad, uint64_t adlen, const uint8_t* npub, + const uint8_t* k, uint8_t mode); + +#endif // ASCON_H_ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/config.h b/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/config.h new file mode 100644 index 0000000..ec8bd6f --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/config.h @@ -0,0 +1,34 @@ +#ifndef CONFIG_H_ +#define CONFIG_H_ + +/* inline the Ascon mode */ +#ifndef ASCON_INLINE_MODE +#define ASCON_INLINE_MODE 1 +#endif + +/* inline the Ascon permutations */ +#ifndef ASCON_INLINE_PERM +#define ASCON_INLINE_PERM 0 +#endif + +/* single function for all permutations */ +#ifndef ASCON_SINGLE_PERM +#define ASCON_SINGLE_PERM 1 +#endif + +/* unroll the permutation loops */ +#ifndef ASCON_UNROLL_LOOPS +#define ASCON_UNROLL_LOOPS 1 +#endif + +/* Ascon data access option { 'W'ordwise, 'B'ytewise, 'H'ybrid, 'M'emcpy } */ +#ifndef ASCON_DATA_ACCESS +#define ASCON_DATA_ACCESS 'H' +#endif + +/* make sure __forceinline is supported */ +#ifndef __forceinline +#define __forceinline inline __attribute__((always_inline)) +#endif + +#endif /* CONFIG_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/endian.h b/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/endian.h new file mode 100644 index 0000000..3944360 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/endian.h @@ -0,0 +1,39 @@ +#ifndef ENDIAN_H_ +#define ENDIAN_H_ + +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + +/* macros for big endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for big endian machines") +#endif +#define U64BIG(x) (x) +#define U32BIG(x) (x) +#define U16BIG(x) (x) + +#elif defined(_MSC_VER) || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + +/* macros for little endian machines */ +#ifndef NDEBUG +#pragma message("Using macros for little endian machines") +#endif +#define U64BIG(x) \ + (((0x00000000000000FFULL & (x)) << 56) | \ + ((0x000000000000FF00ULL & (x)) << 40) | \ + ((0x0000000000FF0000ULL & (x)) << 24) | \ + ((0x00000000FF000000ULL & (x)) << 8) | \ + ((0x000000FF00000000ULL & (x)) >> 8) | \ + ((0x0000FF0000000000ULL & (x)) >> 24) | \ + ((0x00FF000000000000ULL & (x)) >> 40) | \ + ((0xFF00000000000000ULL & (x)) >> 56)) +#define U32BIG(x) \ + (((0x000000FF & (x)) << 24) | ((0x0000FF00 & (x)) << 8) | \ + ((0x00FF0000 & (x)) >> 8) | ((0xFF000000 & (x)) >> 24)) +#define U16BIG(x) (((0x00FF & (x)) << 8) | ((0xFF00 & (x)) >> 8)) + +#else +#error "Ascon byte order macros not defined in endian.h" +#endif + +#endif /* ENDIAN_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/hash.c b/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/hash.c new file mode 100644 index 0000000..34dec63 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/hash.c @@ -0,0 +1,51 @@ +#include "api.h" +#include "ascon.h" +#include "loadstore.h" +#include "permutations.h" +#include "printstate.h" + +int crypto_hash(uint8_t* out, const uint8_t* in, uint64_t len) { + state_t s; + + /* initialization */ +#ifdef ASCON_HASH + s.x0 = ASCON_HASH_IV0; + s.x1 = ASCON_HASH_IV1; + s.x2 = ASCON_HASH_IV2; + s.x3 = ASCON_HASH_IV3; + s.x4 = ASCON_HASH_IV4; +#endif +#ifdef ASCON_XOF + s.x0 = ASCON_XOF_IV0; + s.x1 = ASCON_XOF_IV1; + s.x2 = ASCON_XOF_IV2; + s.x3 = ASCON_XOF_IV3; + s.x4 = ASCON_XOF_IV4; +#endif + printstate("initialization", &s); + + /* absorb plaintext */ + while (len >= ASCON_RATE) { + XOR(s.x0, LOAD64(in)); + P12(&s); + in += ASCON_RATE; + len -= ASCON_RATE; + } + if (len) XOR(s.x0, LOAD(in, len)); + XOR(s.x0, PAD(len)); + P12(&s); + printstate("absorb plaintext", &s); + + /* squeeze output */ + len = CRYPTO_BYTES; + while (len > ASCON_RATE) { + STORE64(out, s.x0); + P12(&s); + out += ASCON_RATE; + len -= ASCON_RATE; + } + STORE64(out, s.x0); + printstate("squeeze output", &s); + + return 0; +} diff --git a/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/implementors b/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/implementors new file mode 100644 index 0000000..b110c1a --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/implementors @@ -0,0 +1,2 @@ +Christoph Dobraunig +Martin Schläffer diff --git a/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/loadstore.h b/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/loadstore.h new file mode 100644 index 0000000..6cb0cca --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/loadstore.h @@ -0,0 +1,118 @@ +#ifndef LOADSTORE_H_ +#define LOADSTORE_H_ + +#include + +#include "config.h" +#include "endian.h" +#include "word.h" + +/* 64-bit LSB mask (undefined for n == 0) */ +#define MASK(n) (~0ull >> (64 - (n))) + +/* get byte from Ascon 64-bit word */ +#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i)))) + +/* set byte in Ascon 64-bit word */ +#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i))) + +#if ASCON_DATA_ACCESS == 'W' + +#ifndef NDEBUG +#pragma message("Using wordwise data access") +#endif + +__forceinline word_t LOAD64(const uint8_t* bytes) { + uint64_t x = U64BIG(*(uint64_t*)bytes); + return U64TOWORD(x); +} + +__forceinline void STORE64(uint8_t* bytes, word_t w) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes = U64BIG(x); +} + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = U64BIG(*(uint64_t*)bytes & MASK(8 * n)); + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + *(uint64_t*)bytes &= ~MASK(8 * n); + *(uint64_t*)bytes |= U64BIG(x); +} + +#elif ASCON_DATA_ACCESS == 'M' + +#ifndef NDEBUG +#pragma message("Using memcpy to access data") +#endif + +#include + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + memcpy((uint8_t*)&x, bytes, n); + return U64TOWORD(U64BIG(x)); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = U64BIG(WORDTOU64(w)); + memcpy(bytes, (uint8_t*)&x, n); +} + +#elif ASCON_DATA_ACCESS == 'B' + +#ifndef NDEBUG +#pragma message("Using bytewise data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#elif ASCON_DATA_ACCESS == 'H' + +#ifndef NDEBUG +#pragma message("Using hybrid data access") +#endif + +#define LOAD64(bytes) LOAD(bytes, 8) +#define STORE64(bytes, w) STORE(bytes, w, 8) + +__forceinline word_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + if (n == 8) + x = U64BIG(*(uint64_t*)bytes); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) x |= (uint64_t)*bytes++ << i; + return U64TOWORD(x); +} + +__forceinline void STORE(uint8_t* bytes, word_t w, int n) { + uint64_t x = WORDTOU64(w); + if (n == 8) + *(uint64_t*)bytes = U64BIG(x); + else + for (int i = 56; i >= 64 - n * 8; i -= 8) *bytes++ = x >> i; +} + +#else +#error "Ascon data access macro not defined correctly" +#endif + +#endif /* LOADSTORE_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/permutations.c b/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/permutations.c new file mode 100644 index 0000000..3aecaa6 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/permutations.c @@ -0,0 +1,26 @@ +#include "permutations.h" + +#include "round.h" + +#if ASCON_INLINE_PERM + +#elif ASCON_SINGLE_PERM + +void P(state_t* s, uint8_t rounds) { + printstate(" permutation input", s); + for (int i = START(rounds); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s) { P12ROUNDS(s); } + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 16 +void P8(state_t* s) { P8ROUNDS(s); } +#endif + +#if defined(CRYPTO_ABYTES) && ASCON_RATE == 8 +void P6(state_t* s) { P6ROUNDS(s); } +#endif + +#endif diff --git a/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/permutations.h b/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/permutations.h new file mode 100644 index 0000000..6172dd5 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/permutations.h @@ -0,0 +1,184 @@ +#ifndef PERMUTATIONS_H_ +#define PERMUTATIONS_H_ + +#include + +#include "api.h" +#include "ascon.h" +#include "printstate.h" +#include "round.h" + +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 + +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 + +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 + +#define ASCON_HASH_BYTES 32 + +#define ASCON_128_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_128A_IV \ + U64TOWORD(((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) + +#define ASCON_80PQ_IV \ + U64TOWORD(((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) + +#define ASCON_HASH_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) + +#define ASCON_XOF_IV \ + U64TOWORD(((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) + +#define ASCON_HASH_IV0 U64TOWORD(0xee9398aadb67f03dull) +#define ASCON_HASH_IV1 U64TOWORD(0x8bb21831c60f1002ull) +#define ASCON_HASH_IV2 U64TOWORD(0xb48a92db98d5da62ull) +#define ASCON_HASH_IV3 U64TOWORD(0x43189921b8f8e3e8ull) +#define ASCON_HASH_IV4 U64TOWORD(0x348fa5c9d525e140ull) + +#define ASCON_XOF_IV0 U64TOWORD(0xb57e273b814cd416ull) +#define ASCON_XOF_IV1 U64TOWORD(0x2b51042562ae2420ull) +#define ASCON_XOF_IV2 U64TOWORD(0x66a3a7768ddf2218ull) +#define ASCON_XOF_IV3 U64TOWORD(0x5aad0a7a8153650cull) +#define ASCON_XOF_IV4 U64TOWORD(0x4f3e0e32539493b6ull) + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 16 +#define IV ASCON_128_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#if ASCON_RATE == 16 +#define IV ASCON_128A_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 8 +#define PB P8 +#endif + +#if ASCON_RATE == 8 && CRYPTO_KEYBYTES == 20 +#define IV ASCON_80PQ_IV +#define PA_ROUNDS 12 +#define PB_ROUNDS 6 +#define PB P6 +#endif + +#define START(n) ((3 + (n)) << 4 | (12 - (n))) + +#if ASCON_UNROLL_LOOPS + +__forceinline void P12ROUNDS(state_t* s) { + ROUND(s, 0xf0); + ROUND(s, 0xe1); + ROUND(s, 0xd2); + ROUND(s, 0xc3); + ROUND(s, 0xb4); + ROUND(s, 0xa5); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +__forceinline void P8ROUNDS(state_t* s) { + ROUND(s, 0xb4); + ROUND(s, 0xa5); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +__forceinline void P6ROUNDS(state_t* s) { + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} + +#else /* !ASCON_UNROLL_LOOPS */ + +__forceinline void P12ROUNDS(state_t* s) { + for (int i = START(12); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +__forceinline void P8ROUNDS(state_t* s) { + for (int i = START(8); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +__forceinline void P6ROUNDS(state_t* s) { + for (int i = START(6); i > 0x4a; i -= 0x0f) ROUND(s, i); +} + +#endif + +#if ASCON_INLINE_PERM + +__forceinline void P12(state_t* s) { + printstate(" permutation input", s); + P12ROUNDS(s); +} + +__forceinline void P8(state_t* s) { + printstate(" permutation input", s); + P8ROUNDS(s); +} + +__forceinline void P6(state_t* s) { + printstate(" permutation input", s); + P6ROUNDS(s); +} + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#elif ASCON_SINGLE_PERM + +#define P12(s) P(s, 12) +#define P8(s) P(s, 8) +#define P6(s) P(s, 6) + +void P(state_t* s, uint8_t rounds); + +#else /* !ASCON_INLINE_PERM && !ASCON_SINGLE_PERM */ + +void P12(state_t* s); +void P8(state_t* s); +void P6(state_t* s); + +__forceinline void P(state_t* s, int i) { + if (i == 12) P12(s); + if (i == 8) P8(s); + if (i == 6) P6(s); +} + +#endif + +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/printstate.h b/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/printstate.h new file mode 100644 index 0000000..34bd476 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/printstate.h @@ -0,0 +1,31 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/round.h b/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/round.h new file mode 100644 index 0000000..077cbfd --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/round.h @@ -0,0 +1,69 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" + +__forceinline void KINIT(word_t* K0, word_t* K1, word_t* K2) { + *K0 = WORD_T(0); + *K1 = WORD_T(0); + *K2 = WORD_T(0); +} + +__forceinline void PINIT(state_t* s) { + s->x0 = WORD_T(0); + s->x1 = WORD_T(0); + s->x2 = WORD_T(0); + s->x3 = WORD_T(0); + s->x4 = WORD_T(0); +} + +__forceinline void ROUND(state_t* s, uint64_t C) { + state_t t; + s->x2 ^= C; + s->x0 ^= s->x4; + s->x4 ^= s->x3; + s->x2 ^= s->x1; + t.x0 = s->x0; + t.x4 = s->x4; + t.x3 = s->x3; + t.x1 = s->x1; + t.x2 = s->x2; + s->x0 = t.x0 ^ (~t.x1 & t.x2); + s->x2 = t.x2 ^ (~t.x3 & t.x4); + s->x4 = t.x4 ^ (~t.x0 & t.x1); + s->x1 = t.x1 ^ (~t.x2 & t.x3); + s->x3 = t.x3 ^ (~t.x4 & t.x0); + s->x1 ^= s->x0; + t.x1 = s->x1; + s->x1 = ROR64(s->x1, 39); + s->x3 ^= s->x2; + t.x2 = s->x2; + s->x2 = ROR64(s->x2, 1); + t.x4 = s->x4; + t.x2 ^= s->x2; + s->x2 = ROR64(s->x2, 6 - 1); + t.x3 = s->x3; + t.x1 ^= s->x1; + s->x3 = ROR64(s->x3, 10); + s->x0 ^= s->x4; + s->x4 = ROR64(s->x4, 7); + t.x3 ^= s->x3; + s->x2 ^= t.x2; + s->x1 = ROR64(s->x1, 61 - 39); + t.x0 = s->x0; + s->x2 = ~s->x2; + s->x3 = ROR64(s->x3, 17 - 10); + t.x4 ^= s->x4; + s->x4 = ROR64(s->x4, 41 - 7); + s->x3 ^= t.x3; + s->x1 ^= t.x1; + s->x0 = ROR64(s->x0, 19); + s->x4 ^= t.x4; + t.x0 ^= s->x0; + s->x0 = ROR64(s->x0, 28 - 19); + s->x0 ^= t.x0; + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/word.h b/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/word.h new file mode 100644 index 0000000..5d601bb --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/opt64_lowsize/word.h @@ -0,0 +1,47 @@ +#ifndef WORD_H_ +#define WORD_H_ + +#include + +#include "config.h" + +typedef uint64_t word_t; + +#define WORD_T +#define UINT64_T + +#define U64TOWORD +#define WORDTOU64 + +#define XOR(a, b) \ + do { \ + (a) ^= (b); \ + } while (0) + +#define AND(a, b) \ + do { \ + (a) &= (b); \ + } while (0) + +__forceinline word_t ROR64(word_t x, int n) { return x >> n | x << (64 - n); } + +__forceinline word_t KEYROT(word_t lo2hi, word_t hi2lo) { + return lo2hi << 32 | hi2lo >> 32; +} + +__forceinline int NOTZERO(word_t a, word_t b) { + int result = 0; + for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&a)[i]; + for (int i = 0; i < 8; ++i) result |= ((uint8_t*)&b)[i]; + return result; +} + +/* set padding byte in 64-bit Ascon word */ +__forceinline word_t PAD(int i) { return WORD_T(0x80ull << (56 - 8 * i)); } + +/* byte mask for 64-bit Ascon word (1 <= n <= 8) */ +__forceinline word_t XMASK(int n) { + return WORD_T(0x00ffffffffffffffull >> (n * 8 - 8)); +} + +#endif /* WORD_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/ref/ascon.h b/ascon/Implementations/crypto_hash/asconxofv12/ref/ascon.h new file mode 100644 index 0000000..8ab0502 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/ref/ascon.h @@ -0,0 +1,14 @@ +#ifndef ASCON_H_ +#define ASCON_H_ + +#include + +#define WORDTOU64 + +typedef uint64_t word_t; + +typedef struct { + word_t x0, x1, x2, x3, x4; +} state_t; + +#endif // ASCON_H_ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/ref/hash.c b/ascon/Implementations/crypto_hash/asconxofv12/ref/hash.c index 3282fc5..fe008b7 100644 --- a/ascon/Implementations/crypto_hash/asconxofv12/ref/hash.c +++ b/ascon/Implementations/crypto_hash/asconxofv12/ref/hash.c @@ -1,50 +1,43 @@ #include "api.h" +#include "ascon.h" +#include "loadstore.h" #include "permutations.h" +#include "printstate.h" -#define RATE (64 / 8) -#define PA_ROUNDS 12 -#define IV ((u64)(8 * (RATE)) << 48 | (u64)(PA_ROUNDS) << 40) +int crypto_hash(uint8_t* out, const uint8_t* in, uint64_t len) { + state_t s; -int crypto_hash(unsigned char *out, const unsigned char *in, - unsigned long long inlen) { - state s; - u64 outlen; - - // initialization - s.x0 = IV; + /* initialization */ + s.x0 = ASCON_XOF_IV; s.x1 = 0; s.x2 = 0; s.x3 = 0; s.x4 = 0; - printstate("initial value:", s); P12(&s); - printstate("initialization:", s); + printstate("initialization", &s); - // absorb plaintext - inlen = inlen; - while (inlen >= RATE) { - s.x0 ^= BYTES_TO_U64(in, 8); + /* absorb plaintext */ + while (len >= ASCON_128_RATE) { + s.x0 ^= LOAD(in, 8); P12(&s); - inlen -= RATE; - in += RATE; + in += ASCON_128_RATE; + len -= ASCON_128_RATE; } - s.x0 ^= BYTES_TO_U64(in, inlen); - s.x0 ^= 0x80ull << (56 - 8 * inlen); - printstate("absorb plaintext:", s); - + s.x0 ^= LOAD(in, len); + s.x0 ^= PAD(len); P12(&s); - printstate("finalization:", s); + printstate("absorb plaintext", &s); - // set hash output - outlen = CRYPTO_BYTES; - while (outlen > RATE) { - U64_TO_BYTES(out, s.x0, 8); + /* squeeze output */ + len = CRYPTO_BYTES; + while (len > ASCON_128_RATE) { + STORE(out, s.x0, 8); P12(&s); - outlen -= RATE; - out += RATE; + out += ASCON_128_RATE; + len -= ASCON_128_RATE; } - U64_TO_BYTES(out, s.x0, 8); + STORE(out, s.x0, 8); + printstate("squeeze output", &s); return 0; } - diff --git a/ascon/Implementations/crypto_hash/asconxofv12/ref/loadstore.h b/ascon/Implementations/crypto_hash/asconxofv12/ref/loadstore.h new file mode 100644 index 0000000..eccf0e3 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/ref/loadstore.h @@ -0,0 +1,31 @@ +#ifndef LOADSTORE_H_ +#define LOADSTORE_H_ + +#include + +/* get byte from Ascon 64-bit word */ +#define GETBYTE(x, i) ((uint8_t)((uint64_t)(x) >> (56 - 8 * (i)))) + +/* set byte in Ascon 64-bit word */ +#define SETBYTE(b, i) ((uint64_t)(b) << (56 - 8 * (i))) + +/* set padding byte in Ascon 64-bit word */ +#define PAD(i) SETBYTE(0x80, i) + +static inline uint64_t MASK(int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) x |= SETBYTE(0xff, i); + return x; +} + +static inline uint64_t LOAD(const uint8_t* bytes, int n) { + uint64_t x = 0; + for (int i = 0; i < n; ++i) x |= SETBYTE(bytes[i], i); + return x; +} + +static inline void STORE(uint8_t* bytes, uint64_t x, int n) { + for (int i = 0; i < n; ++i) bytes[i] = GETBYTE(x, i); +} + +#endif /* LOADSTORE_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/ref/permutations.h b/ascon/Implementations/crypto_hash/asconxofv12/ref/permutations.h index 3317a9f..ef6dd68 100644 --- a/ascon/Implementations/crypto_hash/asconxofv12/ref/permutations.h +++ b/ascon/Implementations/crypto_hash/asconxofv12/ref/permutations.h @@ -1,132 +1,88 @@ #ifndef PERMUTATIONS_H_ #define PERMUTATIONS_H_ -#ifdef DEBUG -#include -#endif +#include -typedef unsigned char u8; -typedef unsigned long long u64; +#include "ascon.h" +#include "printstate.h" +#include "round.h" -typedef struct { - u64 x0, x1, x2, x3, x4; -} state; +#define ASCON_128_KEYBYTES 16 +#define ASCON_128A_KEYBYTES 16 +#define ASCON_80PQ_KEYBYTES 20 -static inline void printstate(const char* text, const state s) { -#ifdef DEBUG - printf("%s\n", text); - printf(" x0=%016llx\n", s.x0); - printf(" x1=%016llx\n", s.x1); - printf(" x2=%016llx\n", s.x2); - printf(" x3=%016llx\n", s.x3); - printf(" x4=%016llx\n", s.x4); -#else - // disable warning about unused parameters - (void)text; - (void)s; -#endif -} +#define ASCON_128_RATE 8 +#define ASCON_128A_RATE 16 -static inline u64 BYTES_TO_U64(const u8* bytes, int n) { - int i; - u64 x = 0; - for (i = 0; i < n; i++) x |= ((u64)bytes[i]) << (56 - 8 * i); - return x; -} +#define ASCON_128_PA_ROUNDS 12 +#define ASCON_128_PB_ROUNDS 6 +#define ASCON_128A_PB_ROUNDS 8 -static inline void U64_TO_BYTES(u8* bytes, const u64 x, int n) { - int i; - for (i = 0; i < n; i++) bytes[i] = (u8)(x >> (56 - 8 * i)); -} +#define ASCON_HASH_BYTES 32 -static inline u64 BYTE_MASK(int n) { - int i; - u64 x = 0; - for (i = 0; i < n; i++) x |= 0xffull << (56 - 8 * i); - return x; -} +#define ASCON_128_IV \ + (((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) -static inline u64 ROTR64(u64 x, int n) { return (x << (64 - n)) | (x >> n); } +#define ASCON_128A_IV \ + (((uint64_t)(ASCON_128_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128A_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128A_PB_ROUNDS) << 32)) -static inline void ROUND(u8 C, state* p) { - state s = *p; - state t; - // addition of round constant - s.x2 ^= C; - printstate(" addition of round constant:", s); - // substitution layer - s.x0 ^= s.x4; - s.x4 ^= s.x3; - s.x2 ^= s.x1; - // start of keccak s-box - t.x0 = ~s.x0; - t.x1 = ~s.x1; - t.x2 = ~s.x2; - t.x3 = ~s.x3; - t.x4 = ~s.x4; - t.x0 &= s.x1; - t.x1 &= s.x2; - t.x2 &= s.x3; - t.x3 &= s.x4; - t.x4 &= s.x0; - s.x0 ^= t.x1; - s.x1 ^= t.x2; - s.x2 ^= t.x3; - s.x3 ^= t.x4; - s.x4 ^= t.x0; - // end of keccak s-box - s.x1 ^= s.x0; - s.x0 ^= s.x4; - s.x3 ^= s.x2; - s.x2 = ~s.x2; - printstate(" substitution layer:", s); - // linear diffusion layer - s.x0 ^= ROTR64(s.x0, 19) ^ ROTR64(s.x0, 28); - s.x1 ^= ROTR64(s.x1, 61) ^ ROTR64(s.x1, 39); - s.x2 ^= ROTR64(s.x2, 1) ^ ROTR64(s.x2, 6); - s.x3 ^= ROTR64(s.x3, 10) ^ ROTR64(s.x3, 17); - s.x4 ^= ROTR64(s.x4, 7) ^ ROTR64(s.x4, 41); - printstate(" linear diffusion layer:", s); - *p = s; -} +#define ASCON_80PQ_IV \ + (((uint64_t)(ASCON_80PQ_KEYBYTES * 8) << 56) | \ + ((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_128_PB_ROUNDS) << 32)) -static inline void P12(state* s) { - printstate(" permutation input:", *s); - ROUND(0xf0, s); - ROUND(0xe1, s); - ROUND(0xd2, s); - ROUND(0xc3, s); - ROUND(0xb4, s); - ROUND(0xa5, s); - ROUND(0x96, s); - ROUND(0x87, s); - ROUND(0x78, s); - ROUND(0x69, s); - ROUND(0x5a, s); - ROUND(0x4b, s); -} +#define ASCON_HASH_IV \ + (((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40) | \ + ((uint64_t)(ASCON_HASH_BYTES * 8) << 0)) -static inline void P8(state* s) { - printstate(" permutation input:", *s); - ROUND(0xb4, s); - ROUND(0xa5, s); - ROUND(0x96, s); - ROUND(0x87, s); - ROUND(0x78, s); - ROUND(0x69, s); - ROUND(0x5a, s); - ROUND(0x4b, s); +#define ASCON_XOF_IV \ + (((uint64_t)(ASCON_128_RATE * 8) << 48) | \ + ((uint64_t)(ASCON_128_PA_ROUNDS) << 40)) + +static inline void P12(state_t* s) { + printstate(" permutation input", s); + ROUND(s, 0xf0); + ROUND(s, 0xe1); + ROUND(s, 0xd2); + ROUND(s, 0xc3); + ROUND(s, 0xb4); + ROUND(s, 0xa5); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); } -static inline void P6(state* s) { - printstate(" permutation input:", *s); - ROUND(0x96, s); - ROUND(0x87, s); - ROUND(0x78, s); - ROUND(0x69, s); - ROUND(0x5a, s); - ROUND(0x4b, s); +static inline void P8(state_t* s) { + printstate(" permutation input", s); + ROUND(s, 0xb4); + ROUND(s, 0xa5); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); } -#endif // PERMUTATIONS_H_ +static inline void P6(state_t* s) { + printstate(" permutation input", s); + ROUND(s, 0x96); + ROUND(s, 0x87); + ROUND(s, 0x78); + ROUND(s, 0x69); + ROUND(s, 0x5a); + ROUND(s, 0x4b); +} +#endif /* PERMUTATIONS_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/ref/printstate.h b/ascon/Implementations/crypto_hash/asconxofv12/ref/printstate.h new file mode 100644 index 0000000..34bd476 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/ref/printstate.h @@ -0,0 +1,31 @@ +#ifndef PRINTSTATE_H_ +#define PRINTSTATE_H_ + +#ifdef NDEBUG + +#define printword(text, w) +#define printstate(text, s) + +#else + +#include +#include + +#include "ascon.h" + +__forceinline void printword(const char* text, const word_t x) { + printf("%s=%016" PRIx64 "\n", text, WORDTOU64(x)); +} + +__forceinline void printstate(const char* text, const state_t* s) { + printf("%s:\n", text); + printword(" x0", s->x0); + printword(" x1", s->x1); + printword(" x2", s->x2); + printword(" x3", s->x3); + printword(" x4", s->x4); +} + +#endif + +#endif /* PRINTSTATE_H_ */ diff --git a/ascon/Implementations/crypto_hash/asconxofv12/ref/round.h b/ascon/Implementations/crypto_hash/asconxofv12/ref/round.h new file mode 100644 index 0000000..dcb6c81 --- /dev/null +++ b/ascon/Implementations/crypto_hash/asconxofv12/ref/round.h @@ -0,0 +1,40 @@ +#ifndef ROUND_H_ +#define ROUND_H_ + +#include "ascon.h" +#include "printstate.h" + +static inline uint64_t ROR64(uint64_t x, int n) { + return (x << (64 - n)) | (x >> n); +} + +static inline void ROUND(state_t* s, uint8_t C) { + state_t t; + /* addition of round constant */ + s->x2 ^= C; + /* substitution layer */ + s->x0 ^= s->x4; + s->x4 ^= s->x3; + s->x2 ^= s->x1; + /* start of keccak s-box */ + t.x0 = s->x0 ^ (~s->x1 & s->x2); + t.x1 = s->x1 ^ (~s->x2 & s->x3); + t.x2 = s->x2 ^ (~s->x3 & s->x4); + t.x3 = s->x3 ^ (~s->x4 & s->x0); + t.x4 = s->x4 ^ (~s->x0 & s->x1); + /* end of keccak s-box */ + t.x1 ^= t.x0; + t.x0 ^= t.x4; + t.x3 ^= t.x2; + t.x2 = ~t.x2; + /* printstate(" substitution layer", &t); */ + /* linear diffusion layer */ + s->x0 = t.x0 ^ ROR64(t.x0, 19) ^ ROR64(t.x0, 28); + s->x1 = t.x1 ^ ROR64(t.x1, 61) ^ ROR64(t.x1, 39); + s->x2 = t.x2 ^ ROR64(t.x2, 1) ^ ROR64(t.x2, 6); + s->x3 = t.x3 ^ ROR64(t.x3, 10) ^ ROR64(t.x3, 17); + s->x4 = t.x4 ^ ROR64(t.x4, 7) ^ ROR64(t.x4, 41); + printstate(" round output", s); +} + +#endif /* ROUND_H_ */ diff --git a/ascon/Implementations/genkat.cmake b/ascon/Implementations/genkat.cmake new file mode 100644 index 0000000..753b49e --- /dev/null +++ b/ascon/Implementations/genkat.cmake @@ -0,0 +1,30 @@ +# setup KAT file name +if(${ALG} STREQUAL ascon128v12 OR ${ALG} STREQUAL ascon128av12) + set(KAT_PATH crypto_aead/${ALG}) + set(KAT_FILE LWC_AEAD_KAT_128_128.txt) +elseif(${ALG} STREQUAL ascon80pqv12) + set(KAT_PATH crypto_aead/${ALG}) + set(KAT_FILE LWC_AEAD_KAT_160_128.txt) +elseif(${ALG} STREQUAL asconhashv12 OR ${ALG} STREQUAL asconxofv12) + set(KAT_PATH crypto_hash/${ALG}) + set(KAT_FILE LWC_HASH_KAT_256.txt) +else() + message(FATAL_ERROR "KAT file name not defined for algorithm ${ALG}.") +endif() + +# remove previous and generate new KAT file +file(REMOVE ${BIN_DIR}/${KAT_FILE}) +if(EXISTS ${BIN_DIR}/${CONFIG}) + execute_process(COMMAND ${BIN_DIR}/${CONFIG}/${EXE_NAME}) +else() + execute_process(COMMAND ${BIN_DIR}/${EXE_NAME}) +endif() +configure_file(${BIN_DIR}/${KAT_FILE} ${BIN_DIR}/${KAT_FILE} NEWLINE_STYLE LF) + +# compare KAT files +execute_process(COMMAND ${CMAKE_COMMAND} -E compare_files + ${BIN_DIR}/${KAT_FILE} ${SRC_DIR}/${KAT_PATH}/${KAT_FILE} + RESULT_VARIABLE COMPARE_RESULT) +if(${COMPARE_RESULT}) + message(FATAL_ERROR "KAT files are not identical.") +endif() diff --git a/ascon/Implementations/tests/crypto_aead.h b/ascon/Implementations/tests/crypto_aead.h new file mode 100644 index 0000000..2119678 --- /dev/null +++ b/ascon/Implementations/tests/crypto_aead.h @@ -0,0 +1,11 @@ +int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, + const unsigned char *m, unsigned long long mlen, + const unsigned char *ad, unsigned long long adlen, + const unsigned char *nsec, const unsigned char *npub, + const unsigned char *k); + +int crypto_aead_decrypt(unsigned char *m, unsigned long long *mlen, + unsigned char *nsec, const unsigned char *c, + unsigned long long clen, const unsigned char *ad, + unsigned long long adlen, const unsigned char *npub, + const unsigned char *k); diff --git a/ascon/Implementations/tests/crypto_hash.h b/ascon/Implementations/tests/crypto_hash.h new file mode 100644 index 0000000..6de17f8 --- /dev/null +++ b/ascon/Implementations/tests/crypto_hash.h @@ -0,0 +1,2 @@ +int crypto_hash(unsigned char *out, const unsigned char *in, + unsigned long long inlen); diff --git a/ascon/Implementations/tests/demo.c b/ascon/Implementations/tests/demo.c new file mode 100644 index 0000000..a52ea01 --- /dev/null +++ b/ascon/Implementations/tests/demo.c @@ -0,0 +1,62 @@ +#include +#include + +#include "api.h" +#if defined(CRYPTO_AEAD) +#include "crypto_aead.h" +#elif defined(CRYPTO_HASH) +#include "crypto_hash.h" +#endif + +void print(unsigned char c, unsigned char* x, unsigned long long xlen) { + unsigned long long i; + printf("%c[%d]=", c, (int)xlen); + for (i = 0; i < xlen; ++i) printf("%02x", x[i]); + printf("\n"); +} + +int main() { + int result = 0; +#if defined(CRYPTO_AEAD) + unsigned long long alen = 0; + unsigned long long mlen = 0; + unsigned long long clen = CRYPTO_ABYTES; + unsigned char a[] = "ASCON"; + unsigned char m[] = "ascon"; + unsigned char c[strlen((const char*)m) + CRYPTO_ABYTES]; + unsigned char nsec[CRYPTO_NSECBYTES]; + unsigned char npub[CRYPTO_NPUBBYTES] = {0}; + unsigned char k[CRYPTO_KEYBYTES] = {0}; + alen = strlen((const char*)a); + mlen = strlen((const char*)m); + print('k', k, CRYPTO_KEYBYTES); + printf(" "); + print('n', npub, CRYPTO_NPUBBYTES); + printf("\n"); + print('a', a, alen); + printf(" "); + print('m', m, mlen); + printf(" -> "); + result |= crypto_aead_encrypt(c, &clen, m, mlen, a, alen, nsec, npub, k); + print('c', c, clen - CRYPTO_ABYTES); + printf(" "); + print('t', c + clen - CRYPTO_ABYTES, CRYPTO_ABYTES); + printf(" -> "); + result |= crypto_aead_decrypt(m, &mlen, nsec, c, clen, a, alen, npub, k); + print('a', a, alen); + printf(" "); + print('m', m, mlen); + printf("\n"); +#elif defined(CRYPTO_HASH) + unsigned long long mlen = 0; + unsigned char m[] = "ascon"; + unsigned char h[CRYPTO_BYTES] = {0}; + mlen = strlen((const char*)m); + print('m', m, mlen); + printf(" -> "); + result |= crypto_hash(h, m, mlen); + print('h', h, CRYPTO_BYTES); + printf("\n"); +#endif + return result; +} diff --git a/ascon/Implementations/tests/genkat_aead.c b/ascon/Implementations/tests/genkat_aead.c new file mode 100644 index 0000000..f03aca9 --- /dev/null +++ b/ascon/Implementations/tests/genkat_aead.c @@ -0,0 +1,162 @@ +// +// NIST-developed software is provided by NIST as a public service. +// You may use, copy and distribute copies of the software in any medium, +// provided that you keep intact this entire notice. You may improve, +// modify and create derivative works of the software or any portion of +// the software, and you may copy and distribute such modifications or +// works. Modified works should carry a notice stating that you changed +// the software and should note the date and nature of any such change. +// Please explicitly acknowledge the National Institute of Standards and +// Technology as the source of the software. +// +// NIST-developed software is expressly provided "AS IS." NIST MAKES NO +// WARRANTY OF ANY KIND, EXPRESS, IMPLIED, IN FACT OR ARISING BY OPERATION +// OF LAW, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTY OF +// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT AND DATA +// ACCURACY. NIST NEITHER REPRESENTS NOR WARRANTS THAT THE OPERATION OF THE +// SOFTWARE WILL BE UNINTERRUPTED OR ERROR-FREE, OR THAT ANY DEFECTS WILL BE +// CORRECTED. NIST DOES NOT WARRANT OR MAKE ANY REPRESENTATIONS REGARDING THE +// USE OF THE SOFTWARE OR THE RESULTS THEREOF, INCLUDING BUT NOT LIMITED TO THE +// CORRECTNESS, ACCURACY, RELIABILITY, OR USEFULNESS OF THE SOFTWARE. +// +// You are solely responsible for determining the appropriateness of using and +// distributing the software and you assume all risks associated with its use, +// including but not limited to the risks and costs of program errors, +// compliance with applicable laws, damage to or loss of data, programs or +// equipment, and the unavailability or interruption of operation. This software +// is not intended to be used in any situation where a failure could cause risk +// of injury or damage to property. The software developed by NIST employees is +// not subject to copyright protection within the United States. +// + +// disable deprecation for sprintf and fopen +#ifdef _MSC_VER +#define _CRT_SECURE_NO_WARNINGS +#endif + +#include +#include +#include + +#include "api.h" +#include "crypto_aead.h" + +#define KAT_SUCCESS 0 +#define KAT_FILE_OPEN_ERROR -1 +#define KAT_DATA_ERROR -3 +#define KAT_CRYPTO_FAILURE -4 + +#define MAX_FILE_NAME 256 +#define MAX_MESSAGE_LENGTH 32 +#define MAX_ASSOCIATED_DATA_LENGTH 32 + +void init_buffer(unsigned char *buffer, unsigned long long numbytes); + +void fprint_bstr(FILE *fp, const char *label, const unsigned char *data, + unsigned long long length); + +int generate_test_vectors(); + +int main() { + int ret = generate_test_vectors(); + + if (ret != KAT_SUCCESS) { + fprintf(stderr, "test vector generation failed with code %d\n", ret); + } + + return ret; +} + +int generate_test_vectors() { + FILE *fp; + char fileName[MAX_FILE_NAME]; + unsigned char key[CRYPTO_KEYBYTES]; + unsigned char nonce[CRYPTO_NPUBBYTES]; + unsigned char msg[MAX_MESSAGE_LENGTH]; + unsigned char msg2[MAX_MESSAGE_LENGTH]; + unsigned char ad[MAX_ASSOCIATED_DATA_LENGTH]; + unsigned char ct[MAX_MESSAGE_LENGTH + CRYPTO_ABYTES]; + unsigned long long clen, mlen2; + int count = 1; + int func_ret, ret_val = KAT_SUCCESS; + + init_buffer(key, sizeof(key)); + init_buffer(nonce, sizeof(nonce)); + init_buffer(msg, sizeof(msg)); + init_buffer(ad, sizeof(ad)); + + sprintf(fileName, "LWC_AEAD_KAT_%d_%d.txt", (CRYPTO_KEYBYTES * 8), + (CRYPTO_NPUBBYTES * 8)); + + if ((fp = fopen(fileName, "w")) == NULL) { + fprintf(stderr, "Couldn't open <%s> for write\n", fileName); + return KAT_FILE_OPEN_ERROR; + } + + for (unsigned long long mlen = 0; + (mlen <= MAX_MESSAGE_LENGTH) && (ret_val == KAT_SUCCESS); mlen++) { + for (unsigned long long adlen = 0; adlen <= MAX_ASSOCIATED_DATA_LENGTH; + adlen++) { + fprintf(fp, "Count = %d\n", count++); + + fprint_bstr(fp, "Key = ", key, CRYPTO_KEYBYTES); + + fprint_bstr(fp, "Nonce = ", nonce, CRYPTO_NPUBBYTES); + + fprint_bstr(fp, "PT = ", msg, mlen); + + fprint_bstr(fp, "AD = ", ad, adlen); + + if ((func_ret = crypto_aead_encrypt(ct, &clen, msg, mlen, ad, adlen, NULL, + nonce, key)) != 0) { + fprintf(fp, "crypto_aead_encrypt returned <%d>\n", func_ret); + ret_val = KAT_CRYPTO_FAILURE; + break; + } + + fprint_bstr(fp, "CT = ", ct, clen); + + fprintf(fp, "\n"); + + if ((func_ret = crypto_aead_decrypt(msg2, &mlen2, NULL, ct, clen, ad, + adlen, nonce, key)) != 0) { + fprintf(fp, "crypto_aead_decrypt returned <%d>\n", func_ret); + ret_val = KAT_CRYPTO_FAILURE; + break; + } + + if (mlen != mlen2) { + fprintf(fp, + "crypto_aead_decrypt returned bad 'mlen': Got <%" PRIu64 + ">, expected <%" PRIu64 ">\n", + mlen2, mlen); + ret_val = KAT_CRYPTO_FAILURE; + break; + } + + if (memcmp(msg, msg2, mlen)) { + fprintf(fp, "crypto_aead_decrypt did not recover the plaintext\n"); + ret_val = KAT_CRYPTO_FAILURE; + break; + } + } + } + + fclose(fp); + + return ret_val; +} + +void fprint_bstr(FILE *fp, const char *label, const unsigned char *data, + unsigned long long length) { + fprintf(fp, "%s", label); + + for (unsigned long long i = 0; i < length; i++) fprintf(fp, "%02X", data[i]); + + fprintf(fp, "\n"); +} + +void init_buffer(unsigned char *buffer, unsigned long long numbytes) { + for (unsigned long long i = 0; i < numbytes; i++) + buffer[i] = (unsigned char)i; +} diff --git a/ascon/Implementations/tests/genkat_hash.c b/ascon/Implementations/tests/genkat_hash.c new file mode 100644 index 0000000..218db5e --- /dev/null +++ b/ascon/Implementations/tests/genkat_hash.c @@ -0,0 +1,120 @@ +// +// NIST-developed software is provided by NIST as a public service. +// You may use, copy and distribute copies of the software in any medium, +// provided that you keep intact this entire notice. You may improve, +// modify and create derivative works of the software or any portion of +// the software, and you may copy and distribute such modifications or +// works. Modified works should carry a notice stating that you changed +// the software and should note the date and nature of any such change. +// Please explicitly acknowledge the National Institute of Standards and +// Technology as the source of the software. +// +// NIST-developed software is expressly provided "AS IS." NIST MAKES NO +// WARRANTY OF ANY KIND, EXPRESS, IMPLIED, IN FACT OR ARISING BY OPERATION +// OF LAW, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTY OF +// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT AND DATA +// ACCURACY. NIST NEITHER REPRESENTS NOR WARRANTS THAT THE OPERATION OF THE +// SOFTWARE WILL BE UNINTERRUPTED OR ERROR-FREE, OR THAT ANY DEFECTS WILL BE +// CORRECTED. NIST DOES NOT WARRANT OR MAKE ANY REPRESENTATIONS REGARDING THE +// USE OF THE SOFTWARE OR THE RESULTS THEREOF, INCLUDING BUT NOT LIMITED TO THE +// CORRECTNESS, ACCURACY, RELIABILITY, OR USEFULNESS OF THE SOFTWARE. +// +// You are solely responsible for determining the appropriateness of using and +// distributing the software and you assume all risks associated with its use, +// including but not limited to the risks and costs of program errors, +// compliance with applicable laws, damage to or loss of data, programs or +// equipment, and the unavailability or interruption of operation. This software +// is not intended to be used in any situation where a failure could cause risk +// of injury or damage to property. The software developed by NIST employees is +// not subject to copyright protection within the United States. +// + +// disable deprecation for sprintf and fopen +#ifdef _MSC_VER +#define _CRT_SECURE_NO_WARNINGS +#endif + +#include +#include + +#include "api.h" +#include "crypto_hash.h" + +#define KAT_SUCCESS 0 +#define KAT_FILE_OPEN_ERROR -1 +#define KAT_DATA_ERROR -3 +#define KAT_CRYPTO_FAILURE -4 + +#define MAX_FILE_NAME 256 +#define MAX_MESSAGE_LENGTH 1024 + +void init_buffer(unsigned char *buffer, unsigned long long numbytes); + +void fprint_bstr(FILE *fp, const char *label, const unsigned char *data, + unsigned long long length); + +int generate_test_vectors(); + +int main() { + int ret = generate_test_vectors(); + + if (ret != KAT_SUCCESS) { + fprintf(stderr, "test vector generation failed with code %d\n", ret); + } + + return ret; +} + +int generate_test_vectors() { + FILE *fp; + char fileName[MAX_FILE_NAME]; + unsigned char msg[MAX_MESSAGE_LENGTH]; + unsigned char digest[CRYPTO_BYTES]; + int ret_val = KAT_SUCCESS; + int count = 1; + + init_buffer(msg, sizeof(msg)); + + sprintf(fileName, "LWC_HASH_KAT_%d.txt", (CRYPTO_BYTES * 8)); + + if ((fp = fopen(fileName, "w")) == NULL) { + fprintf(stderr, "Couldn't open <%s> for write\n", fileName); + return KAT_FILE_OPEN_ERROR; + } + + for (unsigned long long mlen = 0; mlen <= MAX_MESSAGE_LENGTH; mlen++) { + fprintf(fp, "Count = %d\n", count++); + + fprint_bstr(fp, "Msg = ", msg, mlen); + + ret_val = crypto_hash(digest, msg, mlen); + + if (ret_val != 0) { + fprintf(fp, "crypto_hash returned <%d>\n", ret_val); + ret_val = KAT_CRYPTO_FAILURE; + break; + } + + fprint_bstr(fp, "MD = ", digest, CRYPTO_BYTES); + + fprintf(fp, "\n"); + } + + fclose(fp); + + return ret_val; +} + +void fprint_bstr(FILE *fp, const char *label, const unsigned char *data, + unsigned long long length) { + fprintf(fp, "%s", label); + + for (unsigned long long i = 0; i < length; i++) fprintf(fp, "%02X", data[i]); + + fprintf(fp, "\n"); +} + +void init_buffer(unsigned char *buffer, unsigned long long numbytes) { + for (unsigned long long i = 0; i < numbytes; i++) + buffer[i] = (unsigned char)i; +} diff --git a/ascon/Implementations/tests/getcycles.c b/ascon/Implementations/tests/getcycles.c new file mode 100644 index 0000000..08c4daf --- /dev/null +++ b/ascon/Implementations/tests/getcycles.c @@ -0,0 +1,154 @@ +#include +#include + +#include "api.h" +#if defined(CRYPTO_AEAD) +#include "crypto_aead.h" +#elif defined(CRYPTO_HASH) +#include "crypto_hash.h" +#endif + +#if !defined(__arm__) && !defined(_M_ARM) +#ifndef NDEBUG +#pragma message("Using RDTSC to count cycles") +#endif +#ifdef _MSC_VER +#include +#define ALIGN(x) +#else +#include +#define ALIGN(x) __attribute__((aligned(x))) +#endif +#define init_cpucycles() +#define cpucycles(cycles) cycles = __rdtsc() +//#define cpucycles(cycles) cycles = __rdtscp(&tmp) +#endif + +#if defined(__ARM_ARCH_6__) || __ARM_ARCH == 6 || _M_ARM == 6 +#define ALIGN(x) __attribute__((aligned(x))) +#ifndef NDEBUG +#pragma message("Using ARMv6 PMU to count cycles") +#endif +#define init_cpucycles() \ + __asm__ __volatile__("mcr p15, 0, %0, c15, c12, 0" ::"r"(1)) +#define cpucycles(cycles) \ + __asm__ __volatile__("mrc p15, 0, %0, c15, c12, 1" : "=r"(cycles)) +#elif defined(__arm__) || defined(_M_ARM) +#define ALIGN(x) __attribute__((aligned(x))) +#ifndef NDEBUG +#pragma message("Using ARMv7 PMU to count cycles") +#endif +#define init_cpucycles() \ + __asm__ __volatile__("mcr p15, 0, %0, c9, c12, 0" ::"r"(17)); \ + __asm__ __volatile__("mcr p15, 0, %0, c9, c12, 1" ::"r"(0x8000000f)); \ + __asm__ __volatile__("mcr p15, 0, %0, c9, c12, 3" ::"r"(0x8000000f)) +#define cpucycles(cycles) \ + __asm__ __volatile__("mrc p15, 0, %0, c9, c13, 0" : "=r"(cycles)) +#endif + +#define NUM_RUNS 16 +#define NUM_BYTES 32768 +#define MAX_LEN 32768 +#define NUM_MLENS 7 + +unsigned long long mlens[] = {1, 8, 16, 32, 64, 1536, 32768}; +unsigned char ALIGN(16) m[MAX_LEN]; +#if defined(CRYPTO_AEAD) +unsigned long long alen = 0; +unsigned long long clen = 0; +unsigned char ALIGN(16) a[MAX_LEN]; +unsigned char ALIGN(16) c[MAX_LEN + CRYPTO_ABYTES]; +unsigned char ALIGN(16) nsec[CRYPTO_NSECBYTES ? CRYPTO_NSECBYTES : 1]; +unsigned char ALIGN(16) npub[CRYPTO_NPUBBYTES ? CRYPTO_NPUBBYTES : 1]; +unsigned char ALIGN(16) k[CRYPTO_KEYBYTES]; +#elif defined(CRYPTO_HASH) +unsigned char ALIGN(16) h[CRYPTO_BYTES]; +#endif + +unsigned long long cycles[NUM_MLENS][NUM_RUNS * 2]; +unsigned int tmp; + +void init_input() { + int i; + for (i = 0; i < MAX_LEN; ++i) m[i] = rand(); +#if defined(CRYPTO_AEAD) + for (i = 0; i < MAX_LEN; ++i) a[i] = rand(); + for (i = 0; i < CRYPTO_KEYBYTES; ++i) k[i] = rand(); + for (i = 0; i < CRYPTO_NPUBBYTES; ++i) npub[i] = rand(); +#endif +} + +unsigned long long measure(unsigned long long mlen) { + unsigned long long NREPS = NUM_BYTES / mlen; + unsigned long long i; +#if defined(__arm__) || defined(_M_ARM) + unsigned int before, after; +#else + unsigned long long before, after; +#endif + init_input(); + cpucycles(before); + for (i = 0; i < NREPS; ++i) +#if defined(CRYPTO_AEAD) + crypto_aead_encrypt(c, &clen, m, mlen, a, alen, nsec, npub, k); +#elif defined(CRYPTO_HASH) + crypto_hash(h, m, mlen); +#endif + cpucycles(after); + return after - before; +} + +int compare_uint64(const void* first, const void* second) { + const unsigned long long* ia = (const unsigned long long*)first; + const unsigned long long* ib = (const unsigned long long*)second; + if (*ia > *ib) return 1; + if (*ia < *ib) return -1; + return 0; +} + +int main(int argc, char* argv[]) { + unsigned long long i, j; + double factor = 1.0; + if (argc == 2) factor = atof(argv[1]); + + init_cpucycles(); + + for (i = 0; i < NUM_MLENS; ++i) { + for (j = 0; j < NUM_RUNS; ++j) cycles[i][j] = measure(mlens[i]); + qsort(cycles[i], NUM_RUNS, sizeof(unsigned long long), &compare_uint64); + } + + printf("\nsorted cycles:\n"); + for (i = 0; i < NUM_MLENS; ++i) { + unsigned long long NREPS = NUM_BYTES / mlens[i]; + printf("%5d: ", (int)mlens[i]); + for (j = 0; j < NUM_RUNS; ++j) printf("%d ", (int)(cycles[i][j] / NREPS)); + printf("\n"); + } + + printf("\ncycles per byte (min,median):\n"); + for (i = 0; i < NUM_MLENS; ++i) { + unsigned long long NREPS = NUM_BYTES / mlens[i]; + unsigned long long bytes = mlens[i] * NREPS; + printf("%5d: %6.1f %6.1f\n", (int)mlens[i], + factor * cycles[i][0] / bytes + 0.05, + factor * cycles[i][NUM_RUNS / 2] / bytes + 0.05); + } + printf("\n"); + + for (i = 0; i < NUM_MLENS; ++i) printf("| %5d ", (int)mlens[i]); + printf("|\n"); + for (i = 0; i < NUM_MLENS; ++i) printf("|------:"); + printf("|\n"); + for (i = 0; i < NUM_MLENS; ++i) { + unsigned long long NREPS = NUM_BYTES / mlens[i]; + unsigned long long bytes = mlens[i] * NREPS; + if (mlens[i] <= 32) + printf("| %5.0f ", factor * cycles[i][0] / bytes + 0.5); + else + printf("| %5.1f ", factor * cycles[i][0] / bytes + 0.05); + } + printf("|\n"); + + return 0; +}