From: Alexandre Julliard <julliard@winehq.org> --- configure | 32 +- configure.ac | 2 + libs/symcrypt/LICENSE | 21 + libs/symcrypt/Makefile.in | 129 + libs/symcrypt/inc/C_asm_shared.inc | 74 + libs/symcrypt/inc/buildInfo.h | 8 + libs/symcrypt/inc/symcrypt.h | 10814 ++++++++++++++++ libs/symcrypt/inc/symcrypt_internal.h | 3768 ++++++ .../symcrypt/inc/symcrypt_internal_shared.inc | 33 + libs/symcrypt/inc/symcrypt_low_level.h | 3137 +++++ libs/symcrypt/lib/3des.c | 831 ++ libs/symcrypt/lib/AesTables.c | 899 ++ libs/symcrypt/lib/DesTables.c | 280 + libs/symcrypt/lib/FatalIntercept.c | 23 + libs/symcrypt/lib/IEEE802_11SaeCustom.c | 1585 +++ libs/symcrypt/lib/ScsTable.c | 388 + libs/symcrypt/lib/a_dispatch.c | 1028 ++ libs/symcrypt/lib/aes-asm.c | 46 + libs/symcrypt/lib/aes-c.c | 468 + libs/symcrypt/lib/aes-default-bc.c | 92 + libs/symcrypt/lib/aes-default.c | 872 ++ libs/symcrypt/lib/aes-key.c | 437 + libs/symcrypt/lib/aes-neon.c | 1889 +++ libs/symcrypt/lib/aes-pattern.c | 348 + libs/symcrypt/lib/aes-xmm.c | 1792 +++ libs/symcrypt/lib/aes-ymm.c | 793 ++ libs/symcrypt/lib/aesCtrDrbg.c | 986 ++ libs/symcrypt/lib/aescmac.c | 258 + libs/symcrypt/lib/aeskw.c | 457 + libs/symcrypt/lib/blockciphermodes.c | 470 + libs/symcrypt/lib/ccm.c | 634 + libs/symcrypt/lib/chacha20.c | 267 + libs/symcrypt/lib/chacha20_poly1305.c | 257 + libs/symcrypt/lib/cpuid.c | 419 + libs/symcrypt/lib/cpuid_um.c | 131 + libs/symcrypt/lib/crt.c | 215 + libs/symcrypt/lib/cshake_pattern.c | 152 + libs/symcrypt/lib/desx.c | 131 + libs/symcrypt/lib/dh.c | 141 + libs/symcrypt/lib/dl_internal_groups.c | 922 ++ libs/symcrypt/lib/dlgroup.c | 2016 +++ libs/symcrypt/lib/dlkey.c | 921 ++ libs/symcrypt/lib/dsa.c | 695 + libs/symcrypt/lib/ec_dh.c | 157 + libs/symcrypt/lib/ec_dispatch.c | 300 + libs/symcrypt/lib/ec_dsa.c | 694 + libs/symcrypt/lib/ec_internal_curve_params.c | 597 + libs/symcrypt/lib/ec_internal_curves.c | 79 + libs/symcrypt/lib/ec_montgomery.c | 443 + libs/symcrypt/lib/ec_mul.c | 571 + libs/symcrypt/lib/ec_short_weierstrass.c | 935 ++ libs/symcrypt/lib/ec_twisted_edwards.c | 575 + libs/symcrypt/lib/eckey.c | 996 ++ libs/symcrypt/lib/ecpoint.c | 785 ++ libs/symcrypt/lib/ecurve.c | 771 ++ libs/symcrypt/lib/env_windowsUserModeWin8_1.c | 187 + libs/symcrypt/lib/equal.c | 48 + libs/symcrypt/lib/fdef_general.c | 1550 +++ libs/symcrypt/lib/fdef_int.c | 1321 ++ libs/symcrypt/lib/fdef_mod.c | 1731 +++ libs/symcrypt/lib/gcm.c | 902 ++ libs/symcrypt/lib/gen_int.c | 368 + libs/symcrypt/lib/ghash.c | 951 ++ libs/symcrypt/lib/ghash_definitions.h | 472 + libs/symcrypt/lib/hash.c | 216 + libs/symcrypt/lib/hash_buffer_pattern.c | 75 + libs/symcrypt/lib/hash_pattern.c | 39 + libs/symcrypt/lib/hkdf.c | 229 + libs/symcrypt/lib/hmac.c | 195 + libs/symcrypt/lib/hmac_pattern.c | 197 + libs/symcrypt/lib/hmacmd5.c | 56 + libs/symcrypt/lib/hmacsha1.c | 65 + libs/symcrypt/lib/hmacsha224.c | 62 + libs/symcrypt/lib/hmacsha256.c | 60 + libs/symcrypt/lib/hmacsha384.c | 59 + libs/symcrypt/lib/hmacsha3_224.c | 122 + libs/symcrypt/lib/hmacsha3_256.c | 122 + libs/symcrypt/lib/hmacsha3_384.c | 124 + libs/symcrypt/lib/hmacsha3_512.c | 126 + libs/symcrypt/lib/hmacsha512.c | 59 + libs/symcrypt/lib/hmacsha512_224.c | 62 + libs/symcrypt/lib/hmacsha512_256.c | 62 + libs/symcrypt/lib/kmac.c | 123 + libs/symcrypt/lib/kmac_pattern.c | 218 + libs/symcrypt/lib/libmain.c | 539 + libs/symcrypt/lib/lms.c | 1162 ++ libs/symcrypt/lib/marvin32.c | 331 + libs/symcrypt/lib/md2.c | 307 + libs/symcrypt/lib/md4.c | 425 + libs/symcrypt/lib/md5.c | 503 + libs/symcrypt/lib/mldsa.c | 1096 ++ libs/symcrypt/lib/mldsa_primitives.c | 2410 ++++ libs/symcrypt/lib/mlkem.c | 1164 ++ libs/symcrypt/lib/mlkem_primitives.c | 1442 +++ libs/symcrypt/lib/modexp.c | 510 + libs/symcrypt/lib/paddingPkcs7.c | 167 + libs/symcrypt/lib/parhash.c | 517 + libs/symcrypt/lib/pbkdf2.c | 126 + libs/symcrypt/lib/pbkdf2_hmacsha1.c | 41 + libs/symcrypt/lib/pbkdf2_hmacsha256.c | 41 + libs/symcrypt/lib/poly1305.c | 468 + libs/symcrypt/lib/precomp.h | 26 + libs/symcrypt/lib/primes.c | 306 + libs/symcrypt/lib/rc2.c | 438 + libs/symcrypt/lib/rc4.c | 156 + libs/symcrypt/lib/rdrand.c | 172 + libs/symcrypt/lib/rdseed.c | 135 + libs/symcrypt/lib/recoding.c | 209 + libs/symcrypt/lib/rsa_enc.c | 1531 +++ libs/symcrypt/lib/rsa_padding.c | 1218 ++ libs/symcrypt/lib/rsakey.c | 1631 +++ libs/symcrypt/lib/sc_lib.h | 5161 ++++++++ libs/symcrypt/lib/sc_lib_mldsa.h | 1081 ++ libs/symcrypt/lib/sc_lib_mlkem.h | 468 + libs/symcrypt/lib/scsTools.c | 367 + libs/symcrypt/lib/selftest.c | 17 + libs/symcrypt/lib/session.c | 377 + libs/symcrypt/lib/sha1.c | 472 + libs/symcrypt/lib/sha256-xmm.c | 354 + libs/symcrypt/lib/sha256-ymm.c | 441 + libs/symcrypt/lib/sha256.c | 1884 +++ libs/symcrypt/lib/sha256Par-ymm.c | 269 + libs/symcrypt/lib/sha256Par.c | 1243 ++ libs/symcrypt/lib/sha3.c | 619 + libs/symcrypt/lib/sha3_224.c | 141 + libs/symcrypt/lib/sha3_256.c | 141 + libs/symcrypt/lib/sha3_384.c | 143 + libs/symcrypt/lib/sha3_512.c | 144 + libs/symcrypt/lib/sha512-ymm.c | 801 ++ libs/symcrypt/lib/sha512.c | 1715 +++ libs/symcrypt/lib/sha512Par-ymm.c | 243 + libs/symcrypt/lib/sha512Par.c | 798 ++ libs/symcrypt/lib/shake.c | 337 + libs/symcrypt/lib/shake_pattern.c | 111 + libs/symcrypt/lib/smallPrimes32.h | 29 + libs/symcrypt/lib/sp800_108.c | 143 + libs/symcrypt/lib/sp800_108_hmacsha1.c | 39 + libs/symcrypt/lib/sp800_108_hmacsha256.c | 39 + libs/symcrypt/lib/sp800_108_hmacsha512.c | 66 + libs/symcrypt/lib/srtp_kdf.c | 175 + libs/symcrypt/lib/ssh_kdf.c | 122 + libs/symcrypt/lib/ssh_kdf_sha256.c | 65 + libs/symcrypt/lib/ssh_kdf_sha512.c | 70 + libs/symcrypt/lib/sskdf.c | 266 + libs/symcrypt/lib/tlsCbcVerify.c | 458 + libs/symcrypt/lib/tlsprf.c | 569 + libs/symcrypt/lib/xmss.c | 2129 +++ libs/symcrypt/lib/xtsaes.c | 727 ++ libs/symcrypt/lib/xtsaes_definitions.h | 176 + libs/symcrypt/lib/xtsaes_pattern.c | 90 + tools/make_makefiles | 5 + tools/makedep.c | 1 + 152 files changed, 95131 insertions(+), 1 deletion(-) create mode 100644 libs/symcrypt/LICENSE create mode 100644 libs/symcrypt/Makefile.in create mode 100644 libs/symcrypt/inc/C_asm_shared.inc create mode 100644 libs/symcrypt/inc/buildInfo.h create mode 100644 libs/symcrypt/inc/symcrypt.h create mode 100644 libs/symcrypt/inc/symcrypt_internal.h create mode 100644 libs/symcrypt/inc/symcrypt_internal_shared.inc create mode 100644 libs/symcrypt/inc/symcrypt_low_level.h create mode 100644 libs/symcrypt/lib/3des.c create mode 100644 libs/symcrypt/lib/AesTables.c create mode 100644 libs/symcrypt/lib/DesTables.c create mode 100644 libs/symcrypt/lib/FatalIntercept.c create mode 100644 libs/symcrypt/lib/IEEE802_11SaeCustom.c create mode 100644 libs/symcrypt/lib/ScsTable.c create mode 100644 libs/symcrypt/lib/a_dispatch.c create mode 100644 libs/symcrypt/lib/aes-asm.c create mode 100644 libs/symcrypt/lib/aes-c.c create mode 100644 libs/symcrypt/lib/aes-default-bc.c create mode 100644 libs/symcrypt/lib/aes-default.c create mode 100644 libs/symcrypt/lib/aes-key.c create mode 100644 libs/symcrypt/lib/aes-neon.c create mode 100644 libs/symcrypt/lib/aes-pattern.c create mode 100644 libs/symcrypt/lib/aes-xmm.c create mode 100644 libs/symcrypt/lib/aes-ymm.c create mode 100644 libs/symcrypt/lib/aesCtrDrbg.c create mode 100644 libs/symcrypt/lib/aescmac.c create mode 100644 libs/symcrypt/lib/aeskw.c create mode 100644 libs/symcrypt/lib/blockciphermodes.c create mode 100644 libs/symcrypt/lib/ccm.c create mode 100644 libs/symcrypt/lib/chacha20.c create mode 100644 libs/symcrypt/lib/chacha20_poly1305.c create mode 100644 libs/symcrypt/lib/cpuid.c create mode 100644 libs/symcrypt/lib/cpuid_um.c create mode 100644 libs/symcrypt/lib/crt.c create mode 100644 libs/symcrypt/lib/cshake_pattern.c create mode 100644 libs/symcrypt/lib/desx.c create mode 100644 libs/symcrypt/lib/dh.c create mode 100644 libs/symcrypt/lib/dl_internal_groups.c create mode 100644 libs/symcrypt/lib/dlgroup.c create mode 100644 libs/symcrypt/lib/dlkey.c create mode 100644 libs/symcrypt/lib/dsa.c create mode 100644 libs/symcrypt/lib/ec_dh.c create mode 100644 libs/symcrypt/lib/ec_dispatch.c create mode 100644 libs/symcrypt/lib/ec_dsa.c create mode 100644 libs/symcrypt/lib/ec_internal_curve_params.c create mode 100644 libs/symcrypt/lib/ec_internal_curves.c create mode 100644 libs/symcrypt/lib/ec_montgomery.c create mode 100644 libs/symcrypt/lib/ec_mul.c create mode 100644 libs/symcrypt/lib/ec_short_weierstrass.c create mode 100644 libs/symcrypt/lib/ec_twisted_edwards.c create mode 100644 libs/symcrypt/lib/eckey.c create mode 100644 libs/symcrypt/lib/ecpoint.c create mode 100644 libs/symcrypt/lib/ecurve.c create mode 100644 libs/symcrypt/lib/env_windowsUserModeWin8_1.c create mode 100644 libs/symcrypt/lib/equal.c create mode 100644 libs/symcrypt/lib/fdef_general.c create mode 100644 libs/symcrypt/lib/fdef_int.c create mode 100644 libs/symcrypt/lib/fdef_mod.c create mode 100644 libs/symcrypt/lib/gcm.c create mode 100644 libs/symcrypt/lib/gen_int.c create mode 100644 libs/symcrypt/lib/ghash.c create mode 100644 libs/symcrypt/lib/ghash_definitions.h create mode 100644 libs/symcrypt/lib/hash.c create mode 100644 libs/symcrypt/lib/hash_buffer_pattern.c create mode 100644 libs/symcrypt/lib/hash_pattern.c create mode 100644 libs/symcrypt/lib/hkdf.c create mode 100644 libs/symcrypt/lib/hmac.c create mode 100644 libs/symcrypt/lib/hmac_pattern.c create mode 100644 libs/symcrypt/lib/hmacmd5.c create mode 100644 libs/symcrypt/lib/hmacsha1.c create mode 100644 libs/symcrypt/lib/hmacsha224.c create mode 100644 libs/symcrypt/lib/hmacsha256.c create mode 100644 libs/symcrypt/lib/hmacsha384.c create mode 100644 libs/symcrypt/lib/hmacsha3_224.c create mode 100644 libs/symcrypt/lib/hmacsha3_256.c create mode 100644 libs/symcrypt/lib/hmacsha3_384.c create mode 100644 libs/symcrypt/lib/hmacsha3_512.c create mode 100644 libs/symcrypt/lib/hmacsha512.c create mode 100644 libs/symcrypt/lib/hmacsha512_224.c create mode 100644 libs/symcrypt/lib/hmacsha512_256.c create mode 100644 libs/symcrypt/lib/kmac.c create mode 100644 libs/symcrypt/lib/kmac_pattern.c create mode 100644 libs/symcrypt/lib/libmain.c create mode 100644 libs/symcrypt/lib/lms.c create mode 100644 libs/symcrypt/lib/marvin32.c create mode 100644 libs/symcrypt/lib/md2.c create mode 100644 libs/symcrypt/lib/md4.c create mode 100644 libs/symcrypt/lib/md5.c create mode 100644 libs/symcrypt/lib/mldsa.c create mode 100644 libs/symcrypt/lib/mldsa_primitives.c create mode 100644 libs/symcrypt/lib/mlkem.c create mode 100644 libs/symcrypt/lib/mlkem_primitives.c create mode 100644 libs/symcrypt/lib/modexp.c create mode 100644 libs/symcrypt/lib/paddingPkcs7.c create mode 100644 libs/symcrypt/lib/parhash.c create mode 100644 libs/symcrypt/lib/pbkdf2.c create mode 100644 libs/symcrypt/lib/pbkdf2_hmacsha1.c create mode 100644 libs/symcrypt/lib/pbkdf2_hmacsha256.c create mode 100644 libs/symcrypt/lib/poly1305.c create mode 100644 libs/symcrypt/lib/precomp.h create mode 100644 libs/symcrypt/lib/primes.c create mode 100644 libs/symcrypt/lib/rc2.c create mode 100644 libs/symcrypt/lib/rc4.c create mode 100644 libs/symcrypt/lib/rdrand.c create mode 100644 libs/symcrypt/lib/rdseed.c create mode 100644 libs/symcrypt/lib/recoding.c create mode 100644 libs/symcrypt/lib/rsa_enc.c create mode 100644 libs/symcrypt/lib/rsa_padding.c create mode 100644 libs/symcrypt/lib/rsakey.c create mode 100644 libs/symcrypt/lib/sc_lib.h create mode 100644 libs/symcrypt/lib/sc_lib_mldsa.h create mode 100644 libs/symcrypt/lib/sc_lib_mlkem.h create mode 100644 libs/symcrypt/lib/scsTools.c create mode 100644 libs/symcrypt/lib/selftest.c create mode 100644 libs/symcrypt/lib/session.c create mode 100644 libs/symcrypt/lib/sha1.c create mode 100644 libs/symcrypt/lib/sha256-xmm.c create mode 100644 libs/symcrypt/lib/sha256-ymm.c create mode 100644 libs/symcrypt/lib/sha256.c create mode 100644 libs/symcrypt/lib/sha256Par-ymm.c create mode 100644 libs/symcrypt/lib/sha256Par.c create mode 100644 libs/symcrypt/lib/sha3.c create mode 100644 libs/symcrypt/lib/sha3_224.c create mode 100644 libs/symcrypt/lib/sha3_256.c create mode 100644 libs/symcrypt/lib/sha3_384.c create mode 100644 libs/symcrypt/lib/sha3_512.c create mode 100644 libs/symcrypt/lib/sha512-ymm.c create mode 100644 libs/symcrypt/lib/sha512.c create mode 100644 libs/symcrypt/lib/sha512Par-ymm.c create mode 100644 libs/symcrypt/lib/sha512Par.c create mode 100644 libs/symcrypt/lib/shake.c create mode 100644 libs/symcrypt/lib/shake_pattern.c create mode 100644 libs/symcrypt/lib/smallPrimes32.h create mode 100644 libs/symcrypt/lib/sp800_108.c create mode 100644 libs/symcrypt/lib/sp800_108_hmacsha1.c create mode 100644 libs/symcrypt/lib/sp800_108_hmacsha256.c create mode 100644 libs/symcrypt/lib/sp800_108_hmacsha512.c create mode 100644 libs/symcrypt/lib/srtp_kdf.c create mode 100644 libs/symcrypt/lib/ssh_kdf.c create mode 100644 libs/symcrypt/lib/ssh_kdf_sha256.c create mode 100644 libs/symcrypt/lib/ssh_kdf_sha512.c create mode 100644 libs/symcrypt/lib/sskdf.c create mode 100644 libs/symcrypt/lib/tlsCbcVerify.c create mode 100644 libs/symcrypt/lib/tlsprf.c create mode 100644 libs/symcrypt/lib/xmss.c create mode 100644 libs/symcrypt/lib/xtsaes.c create mode 100644 libs/symcrypt/lib/xtsaes_definitions.h create mode 100644 libs/symcrypt/lib/xtsaes_pattern.c diff --git a/configure b/configure index 9ba2a3fa264..a47cc261172 100755 --- a/configure +++ b/configure @@ -728,6 +728,8 @@ TOMCRYPT_PE_LIBS TOMCRYPT_PE_CFLAGS TIFF_PE_LIBS TIFF_PE_CFLAGS +SYMCRYPT_PE_LIBS +SYMCRYPT_PE_CFLAGS SQLITE3_PE_LIBS SQLITE3_PE_CFLAGS PNG_PE_LIBS @@ -1707,6 +1709,7 @@ enable_sqlite3 enable_strmbase enable_strmiids enable_strsafe +enable_symcrypt enable_tiff enable_tomcrypt enable_unwind @@ -1891,6 +1894,8 @@ PNG_PE_CFLAGS PNG_PE_LIBS SQLITE3_PE_CFLAGS SQLITE3_PE_LIBS +SYMCRYPT_PE_CFLAGS +SYMCRYPT_PE_LIBS TIFF_PE_CFLAGS TIFF_PE_LIBS TOMCRYPT_PE_CFLAGS @@ -2739,6 +2744,11 @@ Some influential environment variables: version SQLITE3_PE_LIBS Linker flags for the PE sqlite3, overriding the bundled version + SYMCRYPT_PE_CFLAGS + C compiler flags for the PE symcrypt, overriding the bundled + version + SYMCRYPT_PE_LIBS + Linker flags for the PE symcrypt, overriding the bundled version TIFF_PE_CFLAGS C compiler flags for the PE tiff, overriding the bundled version TIFF_PE_LIBS @@ -14883,6 +14893,23 @@ fi printf "%s\n" "$as_me:${as_lineno-$LINENO}: sqlite3 cflags: $SQLITE3_PE_CFLAGS" >&5 printf "%s\n" "$as_me:${as_lineno-$LINENO}: sqlite3 libs: $SQLITE3_PE_LIBS" >&5 +if ${SYMCRYPT_PE_LIBS:+false} : +then : + SYMCRYPT_PE_LIBS=symcrypt + if ${SYMCRYPT_PE_CFLAGS:+false} : +then : + SYMCRYPT_PE_CFLAGS="-I\$(top_srcdir)/libs/symcrypt/inc" +else case e in #( + e) enable_symcrypt=no ;; +esac +fi +else case e in #( + e) enable_symcrypt=no ;; +esac +fi +printf "%s\n" "$as_me:${as_lineno-$LINENO}: symcrypt cflags: $SYMCRYPT_PE_CFLAGS" >&5 +printf "%s\n" "$as_me:${as_lineno-$LINENO}: symcrypt libs: $SYMCRYPT_PE_LIBS" >&5 + if ${TIFF_PE_LIBS:+false} : then : TIFF_PE_LIBS="tiff \$(ZLIB_PE_LIBS)" @@ -24034,6 +24061,7 @@ wine_fn_config_makefile libs/sqlite3 enable_sqlite3 wine_fn_config_makefile libs/strmbase enable_strmbase wine_fn_config_makefile libs/strmiids enable_strmiids wine_fn_config_makefile libs/strsafe enable_strsafe +wine_fn_config_makefile libs/symcrypt enable_symcrypt wine_fn_config_makefile libs/tiff enable_tiff wine_fn_config_makefile libs/tomcrypt enable_tomcrypt wine_fn_config_makefile libs/unwind enable_unwind @@ -24262,7 +24290,7 @@ dlls/wineandroid.drv/wine-debug.apk: dlls/wineandroid.drv/build.gradle ${wine_sr mv dlls/wineandroid.drv/build/outputs/apk/debug/wine-debug.apk \$@" -EXTERNAL_SUBDIRS="libs/capstone libs/c++ libs/c++abi libs/faudio libs/fluidsynth libs/gsm libs/icucommon libs/icui18n libs/jpeg libs/jxr libs/lcms2 libs/ldap libs/mpg123 libs/musl libs/png libs/sqlite3 libs/tiff libs/tomcrypt libs/unwind libs/vkd3d libs/xml2 libs/xslt libs/zlib libs/compiler-rt" +EXTERNAL_SUBDIRS="libs/capstone libs/c++ libs/c++abi libs/faudio libs/fluidsynth libs/gsm libs/icucommon libs/icui18n libs/jpeg libs/jxr libs/lcms2 libs/ldap libs/mpg123 libs/musl libs/png libs/sqlite3 libs/symcrypt libs/tiff libs/tomcrypt libs/unwind libs/vkd3d libs/xml2 libs/xslt libs/zlib libs/compiler-rt" TAGSFLAGS="--langmap='c:+.idl.l.rh,make:(Make*.in)'" @@ -25185,6 +25213,8 @@ PNG_PE_CFLAGS = $PNG_PE_CFLAGS PNG_PE_LIBS = $PNG_PE_LIBS SQLITE3_PE_CFLAGS = $SQLITE3_PE_CFLAGS SQLITE3_PE_LIBS = $SQLITE3_PE_LIBS +SYMCRYPT_PE_CFLAGS = $SYMCRYPT_PE_CFLAGS +SYMCRYPT_PE_LIBS = $SYMCRYPT_PE_LIBS TIFF_PE_CFLAGS = $TIFF_PE_CFLAGS TIFF_PE_LIBS = $TIFF_PE_LIBS TOMCRYPT_PE_CFLAGS = $TOMCRYPT_PE_CFLAGS diff --git a/configure.ac b/configure.ac index 60262f26f68..f4dc1eba3eb 100644 --- a/configure.ac +++ b/configure.ac @@ -1246,6 +1246,7 @@ WINE_EXTLIB_FLAGS(MPG123, mpg123, mpg123, "-I\$(top_srcdir)/libs/mpg123/src/incl WINE_EXTLIB_FLAGS(MUSL, musl, musl) WINE_EXTLIB_FLAGS(PNG, png, "png \$(ZLIB_PE_LIBS)", "-I\$(top_srcdir)/libs/png") WINE_EXTLIB_FLAGS(SQLITE3, sqlite3, sqlite3) +WINE_EXTLIB_FLAGS(SYMCRYPT, symcrypt, symcrypt, "-I\$(top_srcdir)/libs/symcrypt/inc") WINE_EXTLIB_FLAGS(TIFF, tiff, "tiff \$(ZLIB_PE_LIBS)", "-I\$(top_srcdir)/libs/tiff/libtiff") WINE_EXTLIB_FLAGS(TOMCRYPT, tomcrypt, tomcrypt, "-I\$(top_srcdir)/libs/tomcrypt/src/headers -DLTC_NO_PROTOTYPES -DLTC_SOURCE") WINE_EXTLIB_FLAGS(UNWIND, unwind, unwind, "-I\$(top_srcdir)/libs/unwind/include") @@ -3541,6 +3542,7 @@ WINE_CONFIG_MAKEFILE(libs/sqlite3) WINE_CONFIG_MAKEFILE(libs/strmbase) WINE_CONFIG_MAKEFILE(libs/strmiids) WINE_CONFIG_MAKEFILE(libs/strsafe) +WINE_CONFIG_MAKEFILE(libs/symcrypt) WINE_CONFIG_MAKEFILE(libs/tiff) WINE_CONFIG_MAKEFILE(libs/tomcrypt) WINE_CONFIG_MAKEFILE(libs/unwind) diff --git a/libs/symcrypt/LICENSE b/libs/symcrypt/LICENSE new file mode 100644 index 00000000000..8cb179cdb69 --- /dev/null +++ b/libs/symcrypt/LICENSE @@ -0,0 +1,21 @@ +Copyright (c) Microsoft Corporation. All rights reserved. + +MIT License + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/libs/symcrypt/Makefile.in b/libs/symcrypt/Makefile.in new file mode 100644 index 00000000000..59150791669 --- /dev/null +++ b/libs/symcrypt/Makefile.in @@ -0,0 +1,129 @@ +STATICLIB = libsymcrypt.a +EXTRAINCL = $(SYMCRYPT_PE_CFLAGS) + +SOURCES = \ + lib/3des.c \ + lib/AesTables.c \ + lib/DesTables.c \ + lib/FatalIntercept.c \ + lib/IEEE802_11SaeCustom.c \ + lib/ScsTable.c \ + lib/a_dispatch.c \ + lib/aes-asm.c \ + lib/aes-c.c \ + lib/aes-default-bc.c \ + lib/aes-default.c \ + lib/aes-key.c \ + lib/aes-neon.c \ + lib/aes-xmm.c \ + lib/aes-ymm.c \ + lib/aesCtrDrbg.c \ + lib/aescmac.c \ + lib/aeskw.c \ + lib/blockciphermodes.c \ + lib/ccm.c \ + lib/chacha20.c \ + lib/chacha20_poly1305.c \ + lib/cpuid.c \ + lib/cpuid_um.c \ + lib/crt.c \ + lib/desx.c \ + lib/dh.c \ + lib/dl_internal_groups.c \ + lib/dlgroup.c \ + lib/dlkey.c \ + lib/dsa.c \ + lib/ec_dh.c \ + lib/ec_dispatch.c \ + lib/ec_dsa.c \ + lib/ec_internal_curve_params.c \ + lib/ec_internal_curves.c \ + lib/ec_montgomery.c \ + lib/ec_mul.c \ + lib/ec_short_weierstrass.c \ + lib/ec_twisted_edwards.c \ + lib/eckey.c \ + lib/ecpoint.c \ + lib/ecurve.c \ + lib/env_windowsUserModeWin8_1.c \ + lib/equal.c \ + lib/fdef_general.c \ + lib/fdef_int.c \ + lib/fdef_mod.c \ + lib/gcm.c \ + lib/gen_int.c \ + lib/ghash.c \ + lib/hash.c \ + lib/hkdf.c \ + lib/hmac.c \ + lib/hmacmd5.c \ + lib/hmacsha1.c \ + lib/hmacsha224.c \ + lib/hmacsha256.c \ + lib/hmacsha384.c \ + lib/hmacsha3_224.c \ + lib/hmacsha3_256.c \ + lib/hmacsha3_384.c \ + lib/hmacsha3_512.c \ + lib/hmacsha512.c \ + lib/hmacsha512_224.c \ + lib/hmacsha512_256.c \ + lib/kmac.c \ + lib/libmain.c \ + lib/lms.c \ + lib/marvin32.c \ + lib/md2.c \ + lib/md4.c \ + lib/md5.c \ + lib/mldsa.c \ + lib/mldsa_primitives.c \ + lib/mlkem.c \ + lib/mlkem_primitives.c \ + lib/modexp.c \ + lib/paddingPkcs7.c \ + lib/parhash.c \ + lib/pbkdf2.c \ + lib/pbkdf2_hmacsha1.c \ + lib/pbkdf2_hmacsha256.c \ + lib/poly1305.c \ + lib/primes.c \ + lib/rc2.c \ + lib/rc4.c \ + lib/rdrand.c \ + lib/rdseed.c \ + lib/recoding.c \ + lib/rsa_enc.c \ + lib/rsa_padding.c \ + lib/rsakey.c \ + lib/scsTools.c \ + lib/selftest.c \ + lib/session.c \ + lib/sha1.c \ + lib/sha256-xmm.c \ + lib/sha256-ymm.c \ + lib/sha256.c \ + lib/sha256Par-ymm.c \ + lib/sha256Par.c \ + lib/sha3.c \ + lib/sha3_224.c \ + lib/sha3_256.c \ + lib/sha3_384.c \ + lib/sha3_512.c \ + lib/sha512-ymm.c \ + lib/sha512.c \ + lib/sha512Par-ymm.c \ + lib/sha512Par.c \ + lib/shake.c \ + lib/sp800_108.c \ + lib/sp800_108_hmacsha1.c \ + lib/sp800_108_hmacsha256.c \ + lib/sp800_108_hmacsha512.c \ + lib/srtp_kdf.c \ + lib/ssh_kdf.c \ + lib/ssh_kdf_sha256.c \ + lib/ssh_kdf_sha512.c \ + lib/sskdf.c \ + lib/tlsCbcVerify.c \ + lib/tlsprf.c \ + lib/xmss.c \ + lib/xtsaes.c diff --git a/libs/symcrypt/inc/C_asm_shared.inc b/libs/symcrypt/inc/C_asm_shared.inc new file mode 100644 index 00000000000..25b33560bfb --- /dev/null +++ b/libs/symcrypt/inc/C_asm_shared.inc @@ -0,0 +1,74 @@ +/* + C_asm_shared.inc file to synchronize C and Asm information + Copyright (c) Microsoft Corporation. Licensed under the MIT license. + + This is a file that is included in both C and ASM such that the values are the same on both sides. + We use the C preprocessor to set ASM constants, as we already need to use the C preprocessor for + symcryptasm processing (see scripts/symcryptasm_processor.py). + We use this to define the structure offsets that the ASM code uses. + By having equivalent C constants we can add checks to the C code to ensure they are correct. + +*/ + +#include "symcrypt_internal_shared.inc" + +#if defined(SYMCRYPT_MASM) +#define SET(_variable, _value) _variable EQU _value +#elif defined(SYMCRYPT_GAS) +#define SET(_variable, _value) .set _variable, _value +#else // assume C +#define SET(_variable, _value) const SIZE_T _variable = _value; +#endif + +SET(SymCryptModulusNdigitsOffsetAmd64, 4); +SET(SymCryptModulusInv64OffsetAmd64, 24); +SET(SymCryptModulusValueOffsetAmd64, 128); +SET(SymCryptNegDivisorSingleDigitOffsetAmd64, 256); + +SET(SymCryptModulusNdigitsOffsetX86, 4); +SET(SymCryptModulusInv64OffsetX86, 24); +SET(SymCryptModulusValueOffsetX86, 96); + +SET(SymCryptModulusNdigitsOffsetArm64, 4); +SET(SymCryptModulusInv64OffsetArm64, 24); +SET(SymCryptModulusValueOffsetArm64, 128); + +SET(SymCryptModulusNdigitsOffsetArm, 4); +SET(SymCryptModulusInv64OffsetArm, 24); +SET(SymCryptModulusValueOffsetArm, 96); + +#if !defined(SYMCRYPT_MASM) && !defined(SYMCRYPT_GAS) +// Preserve the definition of SET for use in symcryptasm processing +#undef SET +#endif + +#if SYMCRYPT_CPU_AMD64 +#define SYMCRYPT_CHECK_ASM_OFFSETS \ + SYMCRYPT_CHECK_ASM_OFFSET( SymCryptModulusNdigitsOffsetAmd64, SYMCRYPT_FIELD_OFFSET( SYMCRYPT_MODULUS, nDigits ) );\ + SYMCRYPT_CHECK_ASM_OFFSET( SymCryptModulusInv64OffsetAmd64, SYMCRYPT_FIELD_OFFSET( SYMCRYPT_MODULUS, inv64 ));\ + SYMCRYPT_CHECK_ASM_OFFSET( SymCryptModulusValueOffsetAmd64, SYMCRYPT_FIELD_OFFSET( SYMCRYPT_MODULUS, Divisor.Int.ti.fdef.uint32 ));\ + SYMCRYPT_CHECK_ASM_OFFSET( SymCryptNegDivisorSingleDigitOffsetAmd64, SYMCRYPT_FIELD_OFFSET( SYMCRYPT_MODULUS, Divisor.Int.ti.fdef.uint32 ) + (2*SYMCRYPT_FDEF_DIGIT_SIZE) );\ + +#elif SYMCRYPT_CPU_X86 +#define SYMCRYPT_CHECK_ASM_OFFSETS \ + SYMCRYPT_CHECK_ASM_OFFSET( SymCryptModulusNdigitsOffsetX86, SYMCRYPT_FIELD_OFFSET( SYMCRYPT_MODULUS, nDigits ) );\ + SYMCRYPT_CHECK_ASM_OFFSET( SymCryptModulusInv64OffsetX86, SYMCRYPT_FIELD_OFFSET( SYMCRYPT_MODULUS, inv64 ));\ + SYMCRYPT_CHECK_ASM_OFFSET( SymCryptModulusValueOffsetX86, SYMCRYPT_FIELD_OFFSET( SYMCRYPT_MODULUS, Divisor.Int.ti.fdef.uint32 ));\ + +#elif SYMCRYPT_CPU_ARM64 +#define SYMCRYPT_CHECK_ASM_OFFSETS \ + SYMCRYPT_CHECK_ASM_OFFSET( SymCryptModulusNdigitsOffsetArm64, SYMCRYPT_FIELD_OFFSET( SYMCRYPT_MODULUS, nDigits ) );\ + SYMCRYPT_CHECK_ASM_OFFSET( SymCryptModulusInv64OffsetArm64, SYMCRYPT_FIELD_OFFSET( SYMCRYPT_MODULUS, inv64 ));\ + SYMCRYPT_CHECK_ASM_OFFSET( SymCryptModulusValueOffsetArm64, SYMCRYPT_FIELD_OFFSET( SYMCRYPT_MODULUS, Divisor.Int.ti.fdef.uint32 ));\ + +#elif SYMCRYPT_CPU_ARM +#define SYMCRYPT_CHECK_ASM_OFFSETS \ + SYMCRYPT_CHECK_ASM_OFFSET( SymCryptModulusNdigitsOffsetArm, SYMCRYPT_FIELD_OFFSET( SYMCRYPT_MODULUS, nDigits ) );\ + SYMCRYPT_CHECK_ASM_OFFSET( SymCryptModulusInv64OffsetArm, SYMCRYPT_FIELD_OFFSET( SYMCRYPT_MODULUS, inv64 ));\ + SYMCRYPT_CHECK_ASM_OFFSET( SymCryptModulusValueOffsetArm, SYMCRYPT_FIELD_OFFSET( SYMCRYPT_MODULUS, Divisor.Int.ti.fdef.uint32 ));\ + +#endif // CPU_* + +#if !defined(SYMCRYPT_CHECK_ASM_OFFSETS) +#define SYMCRYPT_CHECK_ASM_OFFSETS +#endif diff --git a/libs/symcrypt/inc/buildInfo.h b/libs/symcrypt/inc/buildInfo.h new file mode 100644 index 00000000000..c31e04a6420 --- /dev/null +++ b/libs/symcrypt/inc/buildInfo.h @@ -0,0 +1,8 @@ +#include "symcrypt_internal_shared.inc" + +#define _SYMCRYPT_STRING_INT(a) #a +#define _SYMCRYPT_STRING(a) _SYMCRYPT_STRING_INT(a) +#define SYMCRYPT_BUILD_INFO_BRANCH "" +#define SYMCRYPT_BUILD_INFO_COMMIT "2026-03-28T00:56:29+01:00_748c20f1fc48" +#define SYMCRYPT_BUILD_INFO_VERSION _SYMCRYPT_STRING(SYMCRYPT_CODE_VERSION_API) "." _SYMCRYPT_STRING(SYMCRYPT_CODE_VERSION_MINOR) "." _SYMCRYPT_STRING(SYMCRYPT_CODE_VERSION_PATCH) +#define SYMCRYPT_BUILD_INFO_TIMESTAMP "" diff --git a/libs/symcrypt/inc/symcrypt.h b/libs/symcrypt/inc/symcrypt.h new file mode 100644 index 00000000000..b650608d525 --- /dev/null +++ b/libs/symcrypt/inc/symcrypt.h @@ -0,0 +1,10814 @@ +// +// SymCrypt.h +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#pragma once + + +#ifdef __cplusplus +extern "C" { +#endif + +#include "symcrypt_internal_shared.inc" + +#define SYMCRYPT_API_VERSION ((SYMCRYPT_CODE_VERSION_API << 16) | SYMCRYPT_CODE_VERSION_MINOR) + +// +// This is the header file for the SymCrypt library which contains +// implementations of cryptographic algorithms. +// +// All API information is in this file. Information in the +// other include files (symcrypt_internal.h) is subject +// to change at any time. Please use only the information in this file. +// The header file symcrypt_low_level contains low-level API functions that +// are sometimes needed. That API surface is not stable across releases. +// + +; // <-- non-functional semicolon that makes the editor's indent work properly. + +// +// General information about SymCrypt: +// +// +// CPU +// This library is built and tested for: X86, AMD64, ARM, and ARM64. +// +// ENVIRONMENT +// SymCrypt can run in different environments, such as kernel mode, user mode, +// etc. +// In earlier versions of the library, the caller specified the environment by passing a +// pointer to the SymCryptInit function. +// It turns out that that model no longer scales with the use of new extended register sets +// or it introduces too much overhead. +// The current library uses a different model. The user of the library invokes one of the +// environment macros inside a C file in the calling process. +// SymCrypt defines macros for each environment. +// The same mechanism will also be used to select between different implementations of a single +// algorithm. For example, a caller might use +// SYMCRYPT_ENVIRONMENT_WINDOWS_KERNELMODE +// SYMCRYPT_SELECT_SHA256_COMPACT +// to indicate that the environment is kernel mode and the compact SHA-256 implementation is to +// be used. +// There are optimized environments for various Windows use cases. +// +// +// CHECKED BUILDS +// For each CPU, SymCrypt is available in both a checked build and a fre build. The +// checked build includes additional error checking which catches the most common +// errors. Please make sure you build a checked version of your binary and test with +// that regularly. +// +// +// MEMORY STRUCTURES +// Most SymCrypt functions do not allocate any memory; all memory is provided by the caller. +// However, callers may not copy, move, or otherwise manipulate the SymCrypt +// data structures. In particular, a memcpy of a SymCrypt data structure is not allowed. +// When necessary SymCrypt provides functions to perform the necessary manipulations. +// If you are missing one, please ask us. +// +// +// MULTI_THREADING +// The routines in this library are multi-thread safe, taking into account the usual +// rules of multiple threads accessing the same data structures. +// Any function that accepts a pointer-to-const argument must be assumed to read the +// corresponding data. If the function accepts a pointer-to-non-const it must be +// assumed to both read and write the data. +// It is safe for two threads to use the same data element as long as both of them +// are only reading from it. For example, an expanded key is typically passed as +// a pointer-to-const to the encryption and decryption routines. Thus, multiple +// threads can perform multiple encryptions/decryptions in parallel using the +// same expanded key. +// +// The normal memory re-order issues apply as well. If one thread initializes a +// data structure and the initialization function returns, it is NOT safe for +// another thread to read the data structure without a suitable memory barrier or +// synchronization primitive. +// +// +// SIDE CHANNELS +// Side channels are ways in which an attacker can receive information about what +// a target process is doing using other aspects than just the input/output behaviour +// of the target. For example, the memory subsystem, CPU load modulation, disk usage, +// and many other aspects can provide side-channels to an attacker. +// +// Wherever possible the implementations in SymCrypt have been hardened against side channels. +// The most important rules are that the instruction sequence and the memory addresses +// accessed do not depend on any of the data being processed. +// As a general rule, the actual data being processed is protected, but the +// length of the data (i.e. the number of bytes) is not protected in this way and +// is treated as public information. +// +// The implementation of the following algorithms are NOT side-channel safe: +// - non-AES-NI based AES +// used on CPUs that don't have AES-NI, or in kernel mode on x86 Win8 and below. +// - DES, 3DES, DESX +// - RC4 +// Making these algorithms side-channel safe would incur an overhead that is too large. +// +// +// FATAL ERRORS +// This is a high-performance library with a minimum of error checking. +// Many functions do not return an error code; this avoids the cost of +// having any error checking on the caller's side for error situations that +// can never occur. However, this does assume that the caller is calling +// SymCrypt using a valid calling sequence with proper parameters. +// In some situations this library will detect improper parameters or +// calling sequences. In those situations the library will generate a fatal +// error, which leads to an abrupt termination of the process (bugcheck in +// kernel mode). Exceptional circumstances may also induce fatal errors within +// the library (i.e. a caller provided buffer causes an access violation when +// it is read, or the library is called without sufficient stack space for the +// requested operation). +// If a fatal error is generated within the library, the internal state of the +// library may be inconsistent (i.e. there may be outstanding memory allocations +// that will never be freed, or a lock may have been taken which will never be +// released). Callers should not catch fatal errors and continue executing, as +// there is no guarantee of stability. +// The checked version of the library has additional error checking which detects +// the most common errors. We strongly recommend that callers build and test a +// checked version of their binary to catch these common errors. +// +// +// ALGORITHM SELF TEST +// SymCrypt includes functions that perform simple self-tests on the algorithm +// implementations. These functions are designed to be used for FIPS certification +// of crypto binaries. They should never fail, and they generate a fatal error +// if they do fail. +// If you are not FIPS-certifying your binaries, you can ignore the self test functions. +// +// +// CHANGES FROM RSA32.LIB +// This library replaces the venerable rsa32(k).lib. The major changes are: +// +// - SymCrypt requires the caller to call a library initialization function +// before calling the various algorithm implementations. +// - SymCrypt requires the caller to specify the environment in which the library +// is running. +// - SymCrypt has a CHKed and FRE version for use in CHKed and FRE builds. +// - The API has been updated. The API is more consistent and has better support +// for 64-bit platforms (use of SIZE_T rather than UINT32 for lengths). +// - All algorithm implementations have been updated to reflect the +// latest cryptographic coding guidelines. Several security weaknesses +// in the RSA32.lib code have been fixed. +// - Code has been optimized for the newer CPUs. +// This includes support for AES-NI, PCLMULQDQ, AVX2, etc. +// Most algorithms are faster, especially the recommended algorithms. +// Some legacy algorithms are somewhat slower due to removal of assembler support. +// Note: performance on older CPUs, like the Pentium 4, is reduced in some places. +// - Code and data now go into their default segments. +// RSA32 has a kernel-mode version where the code and data go into +// special segments. This allows the crypto code to be made pageable or +// nonpageable separate from the rest of the executable. This feature is +// error-prone, and not widely used. Furthermore, it switches on a per-lib +// basis, rather than a per-functionality basis, which is the wrong granularity. +// - Added native support for HMAC-SHA256 and HMAC-SHA512. +// - Support for parallel hashing, improves throughput up to 500%. +// - SymCrypt does not support binary copying of internal state information, because +// it imposes restrictions on what the library can do. +// Thus, you may NOT do a memcpy or remote copy on any SymCrypt data structure. +// SymCrypt provides copy functions where necessary, if you need others please ask. +// + +// +// Error codes +// +// This is a high-performance library with a minimum of error checking. Most +// routines do not perform any error checking at all. +// Some routines perform internal consistency checks and will cause a fatal +// error if the library is used incorrectly. +// +// In a few cases routines return an error code when they are called incorrectly. +// Mostly this is for key expansion routines which return an error code when the key +// size is wrong. This allows a higher-level library to be agnostic as to the proper +// key sizes for an algorithm and use the SymCrypt library to detect key size errors. +// +// For performance reasons this library avoids per-message error codes wherever possible. +// +// As this library can be used in many different contexts---kernel mode, user mode, +// WinCE, Xbox, etc.---we don't use one of the standard error types but use our own. +// Callers should not depend on the integer value of any of these enums. +// +// Error codes will signal the cause of the error, but callers should not rely on the +// exact symbolic error code returned. Especially in situations where multiple errors +// occur at once (e.g. multiple invalid parameters) the exact error symbol returned +// could change between versions of the library. +// + +#ifndef _Return_type_success_ +#define _Return_type_success_(expr) +#endif + +typedef _Return_type_success_( return == SYMCRYPT_NO_ERROR ) enum { + SYMCRYPT_NO_ERROR = 0, + SYMCRYPT_UNUSED = 0x8000, // Start our error codes here so they're easier to distinguish + SYMCRYPT_WRONG_KEY_SIZE, + SYMCRYPT_WRONG_BLOCK_SIZE, + SYMCRYPT_WRONG_DATA_SIZE, + SYMCRYPT_WRONG_NONCE_SIZE, + SYMCRYPT_WRONG_TAG_SIZE, + SYMCRYPT_WRONG_ITERATION_COUNT, + SYMCRYPT_AUTHENTICATION_FAILURE, + SYMCRYPT_EXTERNAL_FAILURE, + SYMCRYPT_FIPS_FAILURE, + SYMCRYPT_HARDWARE_FAILURE, + SYMCRYPT_NOT_IMPLEMENTED, + SYMCRYPT_INVALID_BLOB, + SYMCRYPT_BUFFER_TOO_SMALL, + SYMCRYPT_INVALID_ARGUMENT, + SYMCRYPT_MEMORY_ALLOCATION_FAILURE, + SYMCRYPT_SIGNATURE_VERIFICATION_FAILURE, + SYMCRYPT_INCOMPATIBLE_FORMAT, + SYMCRYPT_VALUE_TOO_LARGE, + SYMCRYPT_SESSION_REPLAY_FAILURE, + SYMCRYPT_HBS_NO_OTS_KEYS_LEFT, + SYMCRYPT_HBS_PUBLIC_ROOT_MISMATCH, +} SYMCRYPT_ERROR; + +// SYMCRYPT_ECURVE_TYPE needs to be completely defined before including +// symcrypt_internal.h because it's a member of another type in there. +typedef enum _SYMCRYPT_ECURVE_TYPE { + SYMCRYPT_ECURVE_TYPE_NULL = 0, + SYMCRYPT_ECURVE_TYPE_SHORT_WEIERSTRASS = 1, + SYMCRYPT_ECURVE_TYPE_TWISTED_EDWARDS = 2, + SYMCRYPT_ECURVE_TYPE_MONTGOMERY = 3, +} SYMCRYPT_ECURVE_TYPE; +// +// SYMCRYPT_ECURVE_TYPE is used to specify the type of the curve. +// + +// SYMCRYPT_DLGROUP_FIPS needs to be completely defined before including +// symcrypt_internal.h because it's a member of another type in there. + +//===================================================== +// DL group operations + +typedef enum _SYMCRYPT_DLGROUP_FIPS { + SYMCRYPT_DLGROUP_FIPS_NONE = 0, + SYMCRYPT_DLGROUP_FIPS_186_2 = 1, + SYMCRYPT_DLGROUP_FIPS_186_3 = 2, +} SYMCRYPT_DLGROUP_FIPS; +// +// Dlgroup enums for the generation and verification of the group parameters. +// These are used in: +// - SymCryptDlgroupGenerate function to specify the appropriate standard to +// be used. +// - SymCryptDlgroupSetValue function to verify that the input parameters were +// properly generated. +// + +typedef enum _SYMCRYPT_DLGROUP_DH_SAFEPRIMETYPE { + SYMCRYPT_DLGROUP_DH_SAFEPRIMETYPE_NONE = 0, + SYMCRYPT_DLGROUP_DH_SAFEPRIMETYPE_IKE_3526 = 1, + SYMCRYPT_DLGROUP_DH_SAFEPRIMETYPE_TLS_7919 = 2, +} SYMCRYPT_DLGROUP_DH_SAFEPRIMETYPE; +#define SYMCRYPT_DLGROUP_DH_SAFEPRIMETYPE_DEFAULT SYMCRYPT_DLGROUP_DH_SAFEPRIMETYPE_TLS_7919 +// +// Dlgroup enums for the specification and verification of the named safe prime group parameters. +// These are used in: +// - SymCryptDlgroupGenerateSafePrime function to specify the appropriate group to +// be used. +// + +// +// The symcrypt_internal.h file contains information only relevant to the internals +// of the library, but they have to be exposed to the compiler of the caller. +// We put those in a separate file to make this file easier to read +// for users of the library. +// The details in the symcrypt_internal.h file can change at any time; +// users should only rely on the information in this header file. +// +#include "symcrypt_internal.h" + +// +// Useful macros +// +// A variety of useful macros. +// +// The load/store macros convert from integer types to an array of bytes and vice versa. +// LOAD<n>_* (p) loads a value of <n> bits from the byte pointer p. +// STORE<n>_* (p,v) stores the n-bit value v to byte pointer p. +// The macros can either do Most Significant Byte first (big-endian) or +// Least Significant Byte first. +// The actual definitions are in the symcrypt_internal.h file because they contain +// items that are not part of the stable public API of SymCrypt. +// + +#define SYMCRYPT_LOAD_LSBFIRST16( p ) SYMCRYPT_INTERNAL_LOAD_LSBFIRST16( p ) +#define SYMCRYPT_LOAD_LSBFIRST32( p ) SYMCRYPT_INTERNAL_LOAD_LSBFIRST32( p ) +#define SYMCRYPT_LOAD_LSBFIRST64( p ) SYMCRYPT_INTERNAL_LOAD_LSBFIRST64( p ) + +#define SYMCRYPT_LOAD_MSBFIRST16( p ) SYMCRYPT_INTERNAL_LOAD_MSBFIRST16( p ) +#define SYMCRYPT_LOAD_MSBFIRST32( p ) SYMCRYPT_INTERNAL_LOAD_MSBFIRST32( p ) +#define SYMCRYPT_LOAD_MSBFIRST64( p ) SYMCRYPT_INTERNAL_LOAD_MSBFIRST64( p ) + +#define SYMCRYPT_STORE_LSBFIRST16( p, v ) SYMCRYPT_INTERNAL_STORE_LSBFIRST16( p, v ) +#define SYMCRYPT_STORE_LSBFIRST32( p, v ) SYMCRYPT_INTERNAL_STORE_LSBFIRST32( p, v ) +#define SYMCRYPT_STORE_LSBFIRST64( p, v ) SYMCRYPT_INTERNAL_STORE_LSBFIRST64( p, v ) + +#define SYMCRYPT_STORE_MSBFIRST16( p, v ) SYMCRYPT_INTERNAL_STORE_MSBFIRST16( p, v ) +#define SYMCRYPT_STORE_MSBFIRST32( p, v ) SYMCRYPT_INTERNAL_STORE_MSBFIRST32( p, v ) +#define SYMCRYPT_STORE_MSBFIRST64( p, v ) SYMCRYPT_INTERNAL_STORE_MSBFIRST64( p, v ) + +// +// Convert between UINT32/UINT64 and variable-sized byte buffers +// +// The load functions take any size input array, and will return an error if the value +// encoded in the array exceeds the range of the target type (UINT32 or UINT64). +// The store functions will return an error if the destination buffer is too small +// to encode the actual value passed. +// An empty buffer (length = 0) encodes the value 0, and the value 0 can be encoded +// in the empty buffer. +// These functions are not side-channel safe. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLoadLsbFirstUint32( + _In_reads_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + _Out_ PUINT32 pDst ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLoadLsbFirstUint64( + _In_reads_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + _Out_ PUINT64 pDst ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLoadMsbFirstUint32( + _In_reads_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + _Out_ PUINT32 pDst ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLoadMsbFirstUint64( + _In_reads_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + _Out_ PUINT64 pDst ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptStoreLsbFirstUint32( + UINT32 src, + _Out_writes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptStoreLsbFirstUint64( + UINT64 src, + _Out_writes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptStoreMsbFirstUint32( + UINT32 src, + _Out_writes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptStoreMsbFirstUint64( + UINT64 src, + _Out_writes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst ); + +// +// Functions to retrieve the bitsize/bytesize of UINT32/UINT64 values +// Note: the bitsize/bytesize of the value 0 is defined as 0. +// Some data formats don't allow empty encodings, so the caller +// should ensure they handle the 0-case properly. +// These functions are NOT side-channel safe. +// +UINT32 +SymCryptUint32Bitsize( UINT32 value ); + +UINT32 +SymCryptUint64Bitsize( UINT64 value ); + +UINT32 +SymCryptUint32Bytesize( UINT32 value ); + +UINT32 +SymCryptUint64Bytesize( UINT64 value ); + + +// +// FORCED MEMORY ACCESS +// +// These macros force a memory access. That is, they require that the memory +// read or write takes place, and do not allow the compiler to optimize the access +// away. This is useful for wiping memory even if the compiler knows the memory will not be used in future. +// +// The READ<n> macros read an n-bit value from a PBYTE and return a BYTE if n=8 and an UINT<n> otherwise. +// The WRITE<n> macros write a value to a PBYTE using the same types as the corresponding READ<n> +// +// These macros provide no other memory ordering requirements, so there are no acquire/release +// semantics, memory barriers, etc. +// + +#define SYMCRYPT_FORCE_READ8( _p ) SYMCRYPT_INTERNAL_FORCE_READ8( _p ) +#define SYMCRYPT_FORCE_READ16( _p ) SYMCRYPT_INTERNAL_FORCE_READ16( _p ) +#define SYMCRYPT_FORCE_READ32( _p ) SYMCRYPT_INTERNAL_FORCE_READ32( _p ) +#define SYMCRYPT_FORCE_READ64( _p ) SYMCRYPT_INTERNAL_FORCE_READ64( _p ) + +#define SYMCRYPT_FORCE_WRITE8( _p, _v ) SYMCRYPT_INTERNAL_FORCE_WRITE8( _p, _v ) +#define SYMCRYPT_FORCE_WRITE16( _p, _v ) SYMCRYPT_INTERNAL_FORCE_WRITE16( _p, _v ) +#define SYMCRYPT_FORCE_WRITE32( _p, _v ) SYMCRYPT_INTERNAL_FORCE_WRITE32( _p, _v ) +#define SYMCRYPT_FORCE_WRITE64( _p, _v ) SYMCRYPT_INTERNAL_FORCE_WRITE64( _p, _v ) + +//========================================================================== +// TYPE MODIFIERS +//========================================================================== +// +// The SymCrypt library uses the following type modifiers +// +// SYMCRYPT_CALL +// +// The calling-convention used by SymCrypt functions. +// Some platforms have multiple calling conventions which differ in the +// way arguments are passed and the stack is handled +// The SYMCRYPT_CALL type modifier selects the correct calling convention. +// The current implementation uses __fastcall on the x86 platform, which +// passes arguments in registers and is generally faster than the __stdcall +// calling convention. +// +// +// SYMCRYPT_ALIGN +// +// On platforms that support alignment declaration this macro expands to +// __declspec(align(<n>)) where <n> is platform-dependent. +// Many data types that SymCrypt defines are SYMCRYPT_ALIGNed. +// When allocating memory for any SymCrypt data type the caller +// has to ensure that the memory is aligned to the natural alignment for +// that platform. (e.g. 4 for x86, 16 for x64) +// Memory allocation functions typically return properly aligned memory blocks. +// The macro SYMCRYPT_ALIGN_VALUE contains the actual value of <n>. +// + +//========================================================================== +// LIBRARY MANAGEMENT +//========================================================================== +// +// SymCrypt runs in many different environments. Boot library, kernel, user mode, +// (for each of x86, amd64, arm), and possibly WinCE, Mobile, Zune, Xbox, etc. +// These different environments can have different requirements. +// +// Creating different libraries for each environment has huge testing and maintenance +// costs. Instead, the user of the library invokes a pre-defined macro in their own code +// that contains the necessary adoptions to that environment. +// Using a macro makes the selection static, which allows the compiler to optimize +// away a lot of the overhead. +// (e.g. if XMM register saving is not needed, the stub function declared by the macro +// will always succeed, and the compiler will inline it and optimize it away.) +// +// Warning: due to recent changes in the Visual Studio C runtime, we cannot test saving +// of the YMM registers in Windows user mode. Because we do not have a kernel mode test +// for saving/restoring the YMM registers, this functionality is currently not tested. +// Before using SymCrypt in Windows 7 kernel mode, additional kernel mode tests should be +// added to verify this functionality. +// + +// +// The following environment macros are available. Callers should invoke one of these +// in their own code. +// +// SYMCRYPT_ENVIRONMENT_WINDOWS_BOOTLIBRARY // only for the current OS release +// +// SYMCRYPT_ENVIRONMENT_WINDOWS_KERNELMODE_LEGACY // Use for any version of Windows. +// SYMCRYPT_ENVIRONMENT_WINDOWS_KERNELMODE_WIN7_N_LATER // Only for Win7 and later +// SYMCRYPT_ENVIRONMENT_WINDOWS_KERNELMODE_WIN8_1_N_LATER // Only for WinBlue and later +// SYMCRYPT_ENVIRONMENT_WINDOWS_KERNELMODE_LATEST // use for latest OS +// +// SYMCRYPT_ENVIRONMENT_WINDOWS_USERMODE_LEGACY // use for any version of Windows +// SYMCRYPT_ENVIRONMENT_WINDOWS_USERMODE_WIN7_N_LATER // Only for Win7 and later (cannot use AVX2 instructions) +// SYMCRYPT_ENVIRONMENT_WINDOWS_USERMODE_WIN8_1_N_LATER // Only for Win8.1 and later +// SYMCRYPT_ENVIRONMENT_WINDOWS_USERMODE_LATEST // use for latest OS +// +// SYMCRYPT_ENVIRONMENT_WINDOWS_KERNELDEBUGGER +// +// SYMCRYPT_ENVIRONMENT_LINUX_USERMODE // use for Linux +// +// SYMCRYPT_ENVIRONMENT_OPTEE_TA // use for OPTEE +// +// SYMCRYPT_ENVIRONMENT_GENERIC // use for all other situations +// + +VOID +SYMCRYPT_CALL +SymCryptInit(void); +// +// Initialize the static library. +// This function MUST be called before any other function in the library. +// It is not necessary to call this function when using the shared object library. +// +// This function does not perform the self tests in the library. +// Doing so would force the linking of all the algorithm in the library, +// which is obviously not desirable for applications that want to link in +// only one or two algorithms. +// If self test are required (e.g. for FIPS certification) they have to be +// called separately for each algorithm. +// +// It is safe to call this function multiple times. +// The library initialization is done in the first call; subsequent calls are no-ops. +// +// If you get an 'undefined symbol' error on this function name, then you forgot +// to invoke one of the environment macros documented above. +// + +VOID +SYMCRYPT_CALL +SymCryptModuleInit( + _In_ UINT32 api, + _In_ UINT32 minor); + +#define SYMCRYPT_MODULE_INIT() SymCryptModuleInit( SYMCRYPT_CODE_VERSION_API, SYMCRYPT_CODE_VERSION_MINOR ); +// +// Initialize the SymCrypt shared object module/dynamic-link library. This function verifies +// that the module version supports the version requested by the application. If the version +// is unsupported, a fatal error will occur. Rather than explicitly calling SymCryptModuleInit, +// the macro SYMCRYPT_MODULE_INIT should be used to call it with the correct arguments. +// + +//========================================================================== +// DATA MANIPULATION +//========================================================================== +// +// This library provides some data manipulation functions that commonly occur +// in cryptographic code. +// + +VOID +SYMCRYPT_CALL +SymCryptWipe( + _Out_writes_bytes_( cbData ) PVOID pbData, + SIZE_T cbData ); + +FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptWipeKnownSize( + _Out_writes_bytes_( cbData ) PVOID pbData, + SIZE_T cbData ); + +// +// The SymCryptWipe and SymCryptWipeKnownSize functions wipe memory. +// They work for any size and any alignment. +// Wiping is faster on x86 and x64 if the data buffer is 16-aligned, +// and the size is a multiple of 16. +// +// The SymCryptWipe function is optimized for the case where the size of the buffer +// is not known at compile time. +// +// The SymCryptWipeKnownSize function is optimized for the case where the +// cbData parameter is a compile-time known value. +// +// The two functions are functionally equivalent, but there can be a significant performance +// differences: +// - calling SymCryptWipeKnownSize when the size is not known at compile time incurs a +// code size penalty. +// - calling SymCryptWipeKnownSize when the size is not known at compile time and is sometimes <= 64 +// incurs a performance penalty. +// (The code assumes that the compiler can optimize all the conditional jumps away. +// Conditional jumps can be very expensive if they are not predicted correctly.) +// - calling SymCryptWipe when the buffer is small and has a compile-time known size incurs +// a performance penalty. +// When in doubt, use SymCryptWipe. +// + +VOID +SYMCRYPT_CALL +SymCryptXorBytes( + _In_reads_( cbBytes ) PCBYTE pbSrc1, + _In_reads_( cbBytes ) PCBYTE pbSrc2, + _Out_writes_( cbBytes ) PBYTE pbResult, + SIZE_T cbBytes ); +// +// Xor two strings of bytes together. +// +// The result buffer can be the same as Src1 or Src2, or can be non-overlapping +// with the inputs. However, the result buffer may not partially overlap with +// one of the inputs. +// + +BOOLEAN +SYMCRYPT_CALL +SymCryptEqual( + _In_reads_( cbBytes ) PCBYTE pbSrc1, + _In_reads_( cbBytes ) PCBYTE pbSrc2, + SIZE_T cbBytes ); +// +// Compare two regions of memory and return TRUE if they are equal, FALSE otherwise. +// +// This function compares all the bytes without an early-out mechanism. +// An early-out implementation, such as memcmp, reveals through side channels +// the position of the first byte where the inputs differ, which leaks information. +// + + +//========================================================================== +// HASH FUNCTIONS +//========================================================================== +// +// All hash functions have a similar interface. For consistency we describe +// the generic parts of the interface once. +// Algorithm-specific comments are given with the API functions of each algorithm separately. +// +// For an algorithm called XXX the following functions, types, and constants are defined: +// +// +// SYMCRYPT_XXX_RESULT_SIZE +// +// A constant giving the size, in bytes, of the result of the hash function. +// +// +// SYMCRYPT_XXX_INPUT_BLOCK_SIZE +// +// A constant giving the natural input block size for the hash function. +// Most callers don't need to know this, but some uses, like the HMAC construction +// adapt to this size to improve efficiency. +// +// +// VOID +// SYMCRYPT_CALL +// SymCryptXxx( _In_reads_( cbData ) PCBYTE pbData, +// SIZE_T cbData, +// _Out_writes_( SYMCRYPT_XXX_RESULT_SIZE ) PBYTE pbResult ); +// +// Computes the hash value of the data buffer. +// If you have all the data to be hashed in a single buffer this is the simplest function to use. +// +// +// SYMCRYPT_XXX_STATE +// +// Type to store the intermediate state of a hash computation. +// This is an opaque type whose structure can change at will. +// It should only be used for transient computations in a single executable +// and not be stored or transferred to a different process. +// The pointer version is also defined (PSYMCRYPT_XXX_STATE) +// +// The SYMCRYPT_XXX_STATE structure contains the entire state of an ongoing +// hash computation. If you want to compute the hash on several strings that +// have the same prefix, the caller may hash the prefix first, then create +// multiple copies using the supplied state copy function, +// and continue hashing the different states with different postfix strings. +// +// VOID +// SYMCRYPT_CALL +// SymCryptXxxInit( _Out_ PSYMCRYPT_XXX_STATE pState ); +// +// Initialize a SYMCRYPT_XXX_STATE for subsequent use. +// +// The state encodes an ongoing hash computation and allows incremental +// computation of a hash function. +// At any point in time the state object encodes a state that is equivalent to +// the hash computation of a data string. +// This function can be called at any time and resets the state to correspond +// to the empty data string. +// The SymCryptXxxAppend function appends data to the data string +// encoded by the state. +// The SymCryptXxxResult function finalizes the computation and +// returns the actual hash result. +// +// +// VOID +// SYMCRYPT_CALL +// SymCryptXxxAppend( _Inout_ PSYMCRYPT_XXX_STATE pState, +// _In_reads_( cbData ) PCBYTE pbData, +// SIZE_T cbData ); +// +// Provide more data to the ongoing hash computation specified by the state. +// The state must have been initialized by SymCryptXxxInit. +// This function can be called multiple times on the same state +// to append more data to the encoded data string. +// +// +// VOID +// SYMCRYPT_CALL +// SymCryptXxxResult( +// _Inout_ PSYMCRYPT_XXX_STATE pState, +// _Out_writes_( SYMCRYPT_XXX_RESULT_SIZE )PBYTE pbResult ); +// +// Returns the hash of the data string encoded by the state. +// If the state was newly initialized this returns the hash of the empty string. +// If one or more SymCryptXxxAppend function calls were made on this state +// it returns the hash of the concatenation of all the data strings +// passed to SymCryptXxxAppend. +// +// The state is re-initialized and ready for re-use; you do not have to call +// SymCryptXxxInit on the state to start another fresh hash computation. +// The state is also wiped of any traces of old data to prevent accidental data leakage. +// +// +// VOID +// SYMCRYPT_CALL +// SymCryptXxxStateCopy( _In_ PCSYMCRYPT_XXX_STATE pSrc, _Out_ PSYMCRYPT_XXX_STATE pDst ); +// +// Create a new copy of the state object. +// +// +// VOID +// SYMCRYPT_CALL +// SymCryptXxxStateExport( +// _In_ PCSYMCRYPT_XXX_STATE pState, +// _Out_writes_bytes_( SYMCRYPT_XXX_STATE_EXPORT_SIZE ) PBYTE pbBlob ); +// +// Converts a hash state to an exported format that can be persisted and re-imported. +// The exported blob is compatible across CPU architectures, and across different +// versions of SymCrypt. +// +// pState must point to a valid initialized hash state. +// +// +// SYMCRYPT_ERROR +// SYMCRYPT_CALL +// SymCryptXxxStateImport( +// _Out_ PSYMCRYPT_XXX_STATE pState, +// _In_reads_bytes_( SYMCRYPT_XXX_STATE_EXPORT_SIZE) PCBYTE pbBlob ); +// +// Imports a hash state that was previously exported with SymCryptXxxStateExport. +// After this call, the effective state of *pState is identical to the effective +// state of *pState that was passed to the SymCryptXxxStateExport function which +// created this blob. +// +// This function returns an error if the blob is incorrectly formatted. +// +// +// VOID +// SYMCRYPT_CALL +// SymCryptXxxSelftest(void); +// +// Perform a minimal self-test on the XXX algorithm. +// This function is designed to be used for achieving FIPS 140-2 compliance or +// to provide a simple self-test when an application starts. +// +// If an error is detected, a platform-specific fatal error action is taken. +// Callers do not need to handle any error conditions. +// +// +// +// +// There are also generic Hash functions that use a virtual table and work +// for any hash algorithm. +// Virtual table addresses that callers can use are supplied through a const-ptr-const definition. +// This supports an application switching the underlying implementation of one algorithm +// without the need to re-compile all the intermediate libraries in between. +// For example, you could use the same signature verification library with the fast hash implementation in one binary, +// and with a compact hash implementation in a second binary, without needing a different +// signature verification library. +// + +typedef enum _SYMCRYPT_HASH_ID +{ + SYMCRYPT_HASH_ID_NULL = 0, + SYMCRYPT_HASH_ID_MD2 = 1, + SYMCRYPT_HASH_ID_MD4 = 2, + SYMCRYPT_HASH_ID_MD5 = 3, + SYMCRYPT_HASH_ID_SHA1 = 4, + SYMCRYPT_HASH_ID_SHA224 = 5, + SYMCRYPT_HASH_ID_SHA256 = 6, + SYMCRYPT_HASH_ID_SHA384 = 7, + SYMCRYPT_HASH_ID_SHA512 = 8, + SYMCRYPT_HASH_ID_SHA512_224 = 9, + SYMCRYPT_HASH_ID_SHA512_256 = 10, + SYMCRYPT_HASH_ID_SHA3_224 = 11, + SYMCRYPT_HASH_ID_SHA3_256 = 12, + SYMCRYPT_HASH_ID_SHA3_384 = 13, + SYMCRYPT_HASH_ID_SHA3_512 = 14, + SYMCRYPT_HASH_ID_SHAKE128 = 15, + SYMCRYPT_HASH_ID_SHAKE256 = 16 +} SYMCRYPT_HASH_ID; + +PCSYMCRYPT_HASH +SYMCRYPT_CALL +SymCryptGetHashAlgorithm( SYMCRYPT_HASH_ID hashId ); +// +// Returns a pointer to the hash algorithm structure for the specified hash ID. +// Returns NULL if the hash ID is invalid. +// + +SIZE_T +SYMCRYPT_CALL +SymCryptHashResultSize( _In_ PCSYMCRYPT_HASH pHash ); + +SIZE_T +SYMCRYPT_CALL +SymCryptHashInputBlockSize( _In_ PCSYMCRYPT_HASH pHash ); + +SIZE_T +SYMCRYPT_CALL +SymCryptHashStateSize( _In_ PCSYMCRYPT_HASH pHash ); +// +// SymCryptHashStateSize +// +// Returns the size, in bytes, of the hash state for this hash algorithm. +// Note that the state must be SYMCRYPT_ALIGNed. +// Alternatively, the SYMCRYPT_HASH_STATE structure is large enough to contain +// any Symcrypt-implemented hash state, so sizeof( SYMCRYPT_HASH_STATE ) is always +// large enough to contain a hash state. +// + +VOID +SYMCRYPT_CALL +SymCryptHash( + _In_ PCSYMCRYPT_HASH pHash, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_MIN( cbResult, pHash->resultSize ) ) PBYTE pbResult, + SIZE_T cbResult ); +// +// SymCryptHash +// +// Compute a hash value using any hash function. +// The number of bytes written to the pbResult buffer is +// min( cbResult, SymCryptHashResultSize( pHash ) ) +// + +VOID +SYMCRYPT_CALL +SymCryptHashInit( + _In_ PCSYMCRYPT_HASH pHash, + _Out_writes_bytes_( pHash->stateSize ) PVOID pState ); + +VOID +SYMCRYPT_CALL +SymCryptHashAppend( + _In_ PCSYMCRYPT_HASH pHash, + _Inout_updates_bytes_( pHash->stateSize ) PVOID pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptHashResult( + _In_ PCSYMCRYPT_HASH pHash, + _Inout_updates_bytes_( pHash->stateSize ) PVOID pState, + _Out_writes_( SYMCRYPT_MIN( cbResult, pHash->resultSize ) ) PBYTE pbResult, + SIZE_T cbResult ); +// +// SymCryptHashResult +// +// Finalizes the hash computation by calling the resultFunc member +// of pHash. +// The hash result is produced to an internal buffer and +// the number of bytes written to the pbResult buffer is +// min( cbResult, SymCryptHashResultSize( pHash ) ) + +VOID +SYMCRYPT_CALL +SymCryptHashStateCopy( + _In_ PCSYMCRYPT_HASH pHash, + _In_reads_(pHash->stateSize) PCVOID pSrc, + _Out_writes_(pHash->stateSize) PVOID pDst); +// +// SymCryptHashStateCopy +// +// Copies the hash state from pSrc to pDst. + +//////////////////////////////////////////////////////////////////////////// +// MD2 +// +// Tha MD2 hash algorithm per RFC1319. +// +// The MD2 hash function has not received widespread analysis and is very slow +// compared to contemporary algorithms. +// +// The SymCrypt implementation of MD2 uses table lookups which leads to a side-channel +// vulnerability. +// +// Per the Crypto SDL, any use of this algorithm in Microsoft code requires +// a Crypto board exemption. Whenever possible, please use SHA-256 or SHA-512. +// +// For details on this API see the description above about the generic hash function API. +// + +#define SYMCRYPT_MD2_RESULT_SIZE (16) +#define SYMCRYPT_MD2_INPUT_BLOCK_SIZE (16) + +VOID +SYMCRYPT_CALL +SymCryptMd2( + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_MD2_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptMd2Init( _Out_ PSYMCRYPT_MD2_STATE pState ); + +VOID +SYMCRYPT_CALL +SymCryptMd2Append( + _Inout_ PSYMCRYPT_MD2_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptMd2Result( + _Inout_ PSYMCRYPT_MD2_STATE pState, + _Out_writes_( SYMCRYPT_MD2_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptMd2StateCopy( _In_ PCSYMCRYPT_MD2_STATE pSrc, _Out_ PSYMCRYPT_MD2_STATE pDst ); + +VOID +SYMCRYPT_CALL +SymCryptMd2StateExport( + _In_ PCSYMCRYPT_MD2_STATE pState, + _Out_writes_bytes_( SYMCRYPT_MD2_STATE_EXPORT_SIZE ) PBYTE pbBlob ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMd2StateImport( + _Out_ PSYMCRYPT_MD2_STATE pState, + _In_reads_bytes_( SYMCRYPT_MD2_STATE_EXPORT_SIZE) PCBYTE pbBlob ); + +VOID +SYMCRYPT_CALL +SymCryptMd2Selftest(void); + +extern const PCSYMCRYPT_HASH SymCryptMd2Algorithm; + +//////////////////////////////////////////////////////////////////////////// +// MD4 +// +// Tha MD4 hash algorithm per RFC1320. +// This implementation is limited to data strings that are in whole bytes. +// Odd bit length are not supported. +// +// The MD4 hash function has been badly broken and is not considered secure. +// Per the Crypto SDL, any use of this algorithm in Microsoft code requires +// a Crypto board exemption. Whenever possible, please use SHA-256 or SHA-512. +// +// For details on this API see the description above about the generic hash function API. +// + +#define SYMCRYPT_MD4_RESULT_SIZE (16) +#define SYMCRYPT_MD4_INPUT_BLOCK_SIZE (64) + +VOID +SYMCRYPT_CALL +SymCryptMd4( + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_MD4_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptMd4Init( _Out_ PSYMCRYPT_MD4_STATE pState ); + +VOID +SYMCRYPT_CALL +SymCryptMd4Append( + _Inout_ PSYMCRYPT_MD4_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptMd4Result( + _Inout_ PSYMCRYPT_MD4_STATE pState, + _Out_writes_( SYMCRYPT_MD4_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptMd4StateCopy( _In_ PCSYMCRYPT_MD4_STATE pSrc, _Out_ PSYMCRYPT_MD4_STATE pDst ); + +VOID +SYMCRYPT_CALL +SymCryptMd4StateExport( + _In_ PCSYMCRYPT_MD4_STATE pState, + _Out_writes_bytes_( SYMCRYPT_MD4_STATE_EXPORT_SIZE ) PBYTE pbBlob ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMd4StateImport( + _Out_ PSYMCRYPT_MD4_STATE pState, + _In_reads_bytes_( SYMCRYPT_MD4_STATE_EXPORT_SIZE) PCBYTE pbBlob ); + +VOID +SYMCRYPT_CALL +SymCryptMd4Selftest(void); + +extern const PCSYMCRYPT_HASH SymCryptMd4Algorithm; + +//////////////////////////////////////////////////////////////////////////// +// MD5 +// +// Tha MD5 hash algorithm per RFC1321. +// This implementation is limited to data strings that are in whole bytes. +// Odd bit length are not supported. +// +// The MD5 hash function has been badly broken and is not considered secure. +// Per the Crypto SDL, any use of this algorithm in Microsoft code requires +// a Crypto board exemption. Whenever possible, please use SHA-256 or SHA-512. +// +// For details on this API see the description above about the generic hash function API. +// + +#define SYMCRYPT_MD5_RESULT_SIZE (16) +#define SYMCRYPT_MD5_INPUT_BLOCK_SIZE (64) + +VOID +SYMCRYPT_CALL +SymCryptMd5( + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_MD5_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptMd5Init( _Out_ PSYMCRYPT_MD5_STATE pState ); + +VOID +SYMCRYPT_CALL +SymCryptMd5Append( + _Inout_ PSYMCRYPT_MD5_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptMd5Result( + _Inout_ PSYMCRYPT_MD5_STATE pState, + _Out_writes_( SYMCRYPT_MD5_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptMd5StateCopy( _In_ PCSYMCRYPT_MD5_STATE pSrc, _Out_ PSYMCRYPT_MD5_STATE pDst ); + +VOID +SYMCRYPT_CALL +SymCryptMd5StateExport( + _In_ PCSYMCRYPT_MD5_STATE pState, + _Out_writes_bytes_( SYMCRYPT_MD5_STATE_EXPORT_SIZE ) PBYTE pbBlob ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMd5StateImport( + _Out_ PSYMCRYPT_MD5_STATE pState, + _In_reads_bytes_( SYMCRYPT_MD5_STATE_EXPORT_SIZE) PCBYTE pbBlob ); + +VOID +SYMCRYPT_CALL +SymCryptMd5Selftest(void); + +extern const PCSYMCRYPT_HASH SymCryptMd5Algorithm; + + +/////////////////////////////////////////////////////////////////////////////// +// SHA-1 +// +// The SHA-1 hash algorithm per FIPS 180-4. +// +// This implementation is limited to data strings that are in whole bytes. +// Odd bit length are not supported. +// +// The SHA-1 standard limits data inputs to a maximum of 2^61-1 bytes. +// This implementation supports larger inputs, and simply wraps the internal message +// length counter. Note that the security properties are unknown for +// such long messages, and their use is not recommended. +// +// The SHA-1 hash algorithm has been broken in a technical sense, and future +// attacks can only get better. +// This algorithm is not recommended for new applications and should only be used +// for backward compatibility. +// Per the Crypto SDL, new uses of this algorithm in Microsoft code require +// a Crypto board exemption. Whenever possible, please use SHA-256 or SHA-512. +// +// For details on this API see the description above about the generic hash function API. +// + +#define SYMCRYPT_SHA1_RESULT_SIZE (20) +#define SYMCRYPT_SHA1_INPUT_BLOCK_SIZE (64) + +VOID +SYMCRYPT_CALL +SymCryptSha1( + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_SHA1_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptSha1Init( _Out_ PSYMCRYPT_SHA1_STATE pState ); + +VOID +SYMCRYPT_CALL +SymCryptSha1Append( + _Inout_ PSYMCRYPT_SHA1_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptSha1Result( + _Inout_ PSYMCRYPT_SHA1_STATE pState, + _Out_writes_( SYMCRYPT_SHA1_RESULT_SIZE )PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptSha1StateCopy( _In_ PCSYMCRYPT_SHA1_STATE pSrc, _Out_ PSYMCRYPT_SHA1_STATE pDst ); + +VOID +SYMCRYPT_CALL +SymCryptSha1StateExport( + _In_ PCSYMCRYPT_SHA1_STATE pState, + _Out_writes_bytes_( SYMCRYPT_SHA1_STATE_EXPORT_SIZE ) PBYTE pbBlob ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSha1StateImport( + _Out_ PSYMCRYPT_SHA1_STATE pState, + _In_reads_bytes_( SYMCRYPT_SHA1_STATE_EXPORT_SIZE) PCBYTE pbBlob ); + +VOID +SYMCRYPT_CALL +SymCryptSha1Selftest(void); + +extern const PCSYMCRYPT_HASH SymCryptSha1Algorithm; + +//////////////////////////////////////////////////////////////////////////// +// SHA-224 +// +// +// The SHA-224 hash algorithm per FIPS 180-4. +// This implementation is limited to data strings that are in whole bytes. +// Odd bit length are not supported. +// +// The SHA-224 standard limits data inputs to a maximum of 2^61-1 bytes. +// This implementation supports larger inputs, and simply wraps the internal message +// length counter. Note that the security properties are unknown for +// such long messages, and their use is not recommended. +// +// This implementation is meant for interoperability and is not recommended for use. +// +// For details on this API see the description above about the generic hash function API. +// + +#define SYMCRYPT_SHA224_RESULT_SIZE (28) +#define SYMCRYPT_SHA224_INPUT_BLOCK_SIZE (64) + +VOID +SYMCRYPT_CALL +SymCryptSha224( + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_SHA224_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptSha224Init( _Out_ PSYMCRYPT_SHA224_STATE pState ); + +VOID +SYMCRYPT_CALL +SymCryptSha224Append( + _Inout_ PSYMCRYPT_SHA224_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptSha224Result( + _Inout_ PSYMCRYPT_SHA224_STATE pState, + _Out_writes_( SYMCRYPT_SHA224_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptSha224StateCopy( _In_ PCSYMCRYPT_SHA224_STATE pSrc, _Out_ PSYMCRYPT_SHA224_STATE pDst ); + +VOID +SYMCRYPT_CALL +SymCryptSha224StateExport( + _In_ PCSYMCRYPT_SHA224_STATE pState, + _Out_writes_bytes_( SYMCRYPT_SHA224_STATE_EXPORT_SIZE ) PBYTE pbBlob ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSha224StateImport( + _Out_ PSYMCRYPT_SHA224_STATE pState, + _In_reads_bytes_( SYMCRYPT_SHA224_STATE_EXPORT_SIZE) PCBYTE pbBlob ); + +VOID +SYMCRYPT_CALL +SymCryptSha224Selftest(void); + +extern const PCSYMCRYPT_HASH SymCryptSha224Algorithm; + +//////////////////////////////////////////////////////////////////////////// +// SHA-256 +// +// +// The SHA-256 hash algorithm per FIPS 180-4. +// This implementation is limited to data strings that are in whole bytes. +// Odd bit length are not supported. +// +// The SHA-256 standard limits data inputs to a maximum of 2^61-1 bytes. +// This implementation supports larger inputs, and simply wraps the internal message +// length counter. Note that the security properties are unknown for +// such long messages, and their use is not recommended. +// +// For details on this API see the description above about the generic hash function API. +// + +#define SYMCRYPT_SHA256_RESULT_SIZE (32) +#define SYMCRYPT_SHA256_INPUT_BLOCK_SIZE (64) + +VOID +SYMCRYPT_CALL +SymCryptSha256( + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_SHA256_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptSha256Init( _Out_ PSYMCRYPT_SHA256_STATE pState ); + +VOID +SYMCRYPT_CALL +SymCryptSha256Append( + _Inout_ PSYMCRYPT_SHA256_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptSha256Result( + _Inout_ PSYMCRYPT_SHA256_STATE pState, + _Out_writes_( SYMCRYPT_SHA256_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptSha256StateCopy( _In_ PCSYMCRYPT_SHA256_STATE pSrc, _Out_ PSYMCRYPT_SHA256_STATE pDst ); + +VOID +SYMCRYPT_CALL +SymCryptSha256StateExport( + _In_ PCSYMCRYPT_SHA256_STATE pState, + _Out_writes_bytes_( SYMCRYPT_SHA256_STATE_EXPORT_SIZE ) PBYTE pbBlob ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSha256StateImport( + _Out_ PSYMCRYPT_SHA256_STATE pState, + _In_reads_bytes_( SYMCRYPT_SHA256_STATE_EXPORT_SIZE) PCBYTE pbBlob ); + +VOID +SYMCRYPT_CALL +SymCryptSha256Selftest(void); + +extern const PCSYMCRYPT_HASH SymCryptSha256Algorithm; + +//////////////////////////////////////////////////////////////////////////// +// SHA-384 +// +// +// The SHA-384 hash algorithm per FIPS 180-4. +// This implementation is limited to data strings that are in whole bytes. +// Odd bit length are not supported. +// +// The SHA-384 standard limits data inputs to a maximum of 2^125-1 bytes. +// This implementation supports larger inputs, and simply wraps the internal message +// length counter. Note that the security properties are unknown for +// such long messages, and their use is not recommended. +// +// For details on this API see the description above about the generic hash function API. +// + +#define SYMCRYPT_SHA384_RESULT_SIZE (48) +#define SYMCRYPT_SHA384_INPUT_BLOCK_SIZE (128) + +VOID +SYMCRYPT_CALL +SymCryptSha384( + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_SHA384_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptSha384Init( _Out_ PSYMCRYPT_SHA384_STATE pState ); + +VOID +SYMCRYPT_CALL +SymCryptSha384Append( + _Inout_ PSYMCRYPT_SHA384_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptSha384Result( + _Inout_ PSYMCRYPT_SHA384_STATE pState, + _Out_writes_( SYMCRYPT_SHA384_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptSha384StateCopy( _In_ PCSYMCRYPT_SHA384_STATE pSrc, _Out_ PSYMCRYPT_SHA384_STATE pDst ); + +VOID +SYMCRYPT_CALL +SymCryptSha384StateExport( + _In_ PCSYMCRYPT_SHA384_STATE pState, + _Out_writes_bytes_( SYMCRYPT_SHA384_STATE_EXPORT_SIZE ) PBYTE pbBlob ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSha384StateImport( + _Out_ PSYMCRYPT_SHA384_STATE pState, + _In_reads_bytes_( SYMCRYPT_SHA384_STATE_EXPORT_SIZE) PCBYTE pbBlob ); + +VOID +SYMCRYPT_CALL +SymCryptSha384Selftest(void); + +extern const PCSYMCRYPT_HASH SymCryptSha384Algorithm; + +//////////////////////////////////////////////////////////////////////////// +// SHA-512 +// +// +// The SHA-512 hash algorithm per FIPS 180-4. +// This implementation is limited to data strings that are in whole bytes. +// Odd bit length are not supported. +// +// The SHA-512 standard limits data inputs to a maximum of 2^125-1 bytes. +// This implementation supports larger inputs, and simply wraps the internal message +// length counter. Note that the security properties are unknown for +// such long messages, and their use is not recommended. +// +// For details on this API see the description above about the generic hash function API. +// + +#define SYMCRYPT_SHA512_RESULT_SIZE (64) +#define SYMCRYPT_SHA512_INPUT_BLOCK_SIZE (128) + +VOID +SYMCRYPT_CALL +SymCryptSha512( + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_SHA512_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptSha512Init( _Out_ PSYMCRYPT_SHA512_STATE pState ); + +VOID +SYMCRYPT_CALL +SymCryptSha512Append( + _Inout_ PSYMCRYPT_SHA512_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptSha512Result( + _Inout_ PSYMCRYPT_SHA512_STATE pState, + _Out_writes_( SYMCRYPT_SHA512_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptSha512StateCopy( _In_ PCSYMCRYPT_SHA512_STATE pSrc, _Out_ PSYMCRYPT_SHA512_STATE pDst ); + +VOID +SYMCRYPT_CALL +SymCryptSha512StateExport( + _In_ PCSYMCRYPT_SHA512_STATE pState, + _Out_writes_bytes_( SYMCRYPT_SHA512_STATE_EXPORT_SIZE ) PBYTE pbBlob ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSha512StateImport( + _Out_ PSYMCRYPT_SHA512_STATE pState, + _In_reads_bytes_( SYMCRYPT_SHA512_STATE_EXPORT_SIZE) PCBYTE pbBlob ); + +VOID +SYMCRYPT_CALL +SymCryptSha512Selftest(void); + +extern const PCSYMCRYPT_HASH SymCryptSha512Algorithm; + + +//////////////////////////////////////////////////////////////////////////// +// SHA-512/224 +// +// +// The SHA-512/224 hash algorithm per FIPS 180-4. +// This implementation is limited to data strings that are in whole bytes. +// Odd bit length are not supported. +// +// The SHA-512/224 standard limits data inputs to a maximum of 2^125-1 bytes. +// This implementation supports larger inputs, and simply wraps the internal message +// length counter. Note that the security properties are unknown for +// such long messages, and their use is not recommended. +// +// This implementation is meant for interoperability and is not recommended for use. +// +// For details on this API see the description above about the generic hash function API. +// + +#define SYMCRYPT_SHA512_224_RESULT_SIZE (28) +#define SYMCRYPT_SHA512_224_INPUT_BLOCK_SIZE (128) + +VOID +SYMCRYPT_CALL +SymCryptSha512_224( + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_SHA512_224_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptSha512_224Init( _Out_ PSYMCRYPT_SHA512_224_STATE pState ); + +VOID +SYMCRYPT_CALL +SymCryptSha512_224Append( + _Inout_ PSYMCRYPT_SHA512_224_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptSha512_224Result( + _Inout_ PSYMCRYPT_SHA512_224_STATE pState, + _Out_writes_( SYMCRYPT_SHA512_224_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptSha512_224StateCopy( _In_ PCSYMCRYPT_SHA512_224_STATE pSrc, _Out_ PSYMCRYPT_SHA512_224_STATE pDst ); + +VOID +SYMCRYPT_CALL +SymCryptSha512_224StateExport( + _In_ PCSYMCRYPT_SHA512_224_STATE pState, + _Out_writes_bytes_( SYMCRYPT_SHA512_224_STATE_EXPORT_SIZE ) PBYTE pbBlob ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSha512_224StateImport( + _Out_ PSYMCRYPT_SHA512_224_STATE pState, + _In_reads_bytes_( SYMCRYPT_SHA512_224_STATE_EXPORT_SIZE) PCBYTE pbBlob ); + +VOID +SYMCRYPT_CALL +SymCryptSha512_224Selftest(void); + +extern const PCSYMCRYPT_HASH SymCryptSha512_224Algorithm; + + +//////////////////////////////////////////////////////////////////////////// +// SHA-512/256 +// +// +// The SHA-512/256 hash algorithm per FIPS 180-4. +// This implementation is limited to data strings that are in whole bytes. +// Odd bit length are not supported. +// +// The SHA-512/256 standard limits data inputs to a maximum of 2^125-1 bytes. +// This implementation supports larger inputs, and simply wraps the internal message +// length counter. Note that the security properties are unknown for +// such long messages, and their use is not recommended. +// +// This implementation is meant for interoperability and is not recommended for use. +// +// For details on this API see the description above about the generic hash function API. +// + +#define SYMCRYPT_SHA512_256_RESULT_SIZE (32) +#define SYMCRYPT_SHA512_256_INPUT_BLOCK_SIZE (128) + +VOID +SYMCRYPT_CALL +SymCryptSha512_256( + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_SHA512_256_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptSha512_256Init( _Out_ PSYMCRYPT_SHA512_256_STATE pState ); + +VOID +SYMCRYPT_CALL +SymCryptSha512_256Append( + _Inout_ PSYMCRYPT_SHA512_256_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptSha512_256Result( + _Inout_ PSYMCRYPT_SHA512_256_STATE pState, + _Out_writes_( SYMCRYPT_SHA512_256_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptSha512_256StateCopy( _In_ PCSYMCRYPT_SHA512_256_STATE pSrc, _Out_ PSYMCRYPT_SHA512_256_STATE pDst ); + +VOID +SYMCRYPT_CALL +SymCryptSha512_256StateExport( + _In_ PCSYMCRYPT_SHA512_256_STATE pState, + _Out_writes_bytes_( SYMCRYPT_SHA512_256_STATE_EXPORT_SIZE ) PBYTE pbBlob ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSha512_256StateImport( + _Out_ PSYMCRYPT_SHA512_256_STATE pState, + _In_reads_bytes_( SYMCRYPT_SHA512_256_STATE_EXPORT_SIZE) PCBYTE pbBlob ); + +VOID +SYMCRYPT_CALL +SymCryptSha512_256Selftest(void); + +extern const PCSYMCRYPT_HASH SymCryptSha512_256Algorithm; + + +//////////////////////////////////////////////////////////////////////////// +// SHA-3 +// +// The SHA-3 family of hash algorithms per FIPS 202. +// This implementation is limited to data strings that are in whole bytes. +// Odd bit length are not supported. +// +// SHA3-224 is meant for interoperability and is not recommended for use. +// +// SHA3-224(M) = KECCAK[448](M || 01, 224) +// SHA3-256(M) = KECCAK[512](M || 01, 256) +// SHA3-384(M) = KECCAK[768](M || 01, 384) +// SHA3-512(M) = KECCAK[1024](M || 01, 512) +// +// For details on this API see the description above about the generic hash function API. +// + + +// +// SHA-3-224 +// + +#define SYMCRYPT_SHA3_224_RESULT_SIZE (28) +#define SYMCRYPT_SHA3_224_INPUT_BLOCK_SIZE (144) + +VOID +SYMCRYPT_CALL +SymCryptSha3_224( + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_(SYMCRYPT_SHA3_224_RESULT_SIZE) PBYTE pbResult); + +VOID +SYMCRYPT_CALL +SymCryptSha3_224Init(_Out_ PSYMCRYPT_SHA3_224_STATE pState); + +VOID +SYMCRYPT_CALL +SymCryptSha3_224Append( + _Inout_ PSYMCRYPT_SHA3_224_STATE pState, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData); + +VOID +SYMCRYPT_CALL +SymCryptSha3_224Result( + _Inout_ PSYMCRYPT_SHA3_224_STATE pState, + _Out_writes_(SYMCRYPT_SHA3_224_RESULT_SIZE) PBYTE pbResult); + +VOID +SYMCRYPT_CALL +SymCryptSha3_224StateCopy(_In_ PCSYMCRYPT_SHA3_224_STATE pSrc, _Out_ PSYMCRYPT_SHA3_224_STATE pDst); + +VOID +SYMCRYPT_CALL +SymCryptSha3_224StateExport( + _In_ PCSYMCRYPT_SHA3_224_STATE pState, + _Out_writes_bytes_(SYMCRYPT_SHA3_224_STATE_EXPORT_SIZE) PBYTE pbBlob); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSha3_224StateImport( + _Out_ PSYMCRYPT_SHA3_224_STATE pState, + _In_reads_bytes_(SYMCRYPT_SHA3_224_STATE_EXPORT_SIZE) PCBYTE pbBlob); + +VOID +SYMCRYPT_CALL +SymCryptSha3_224Selftest(void); + +extern const PCSYMCRYPT_HASH SymCryptSha3_224Algorithm; + + +// +// SHA-3-256 +// + +#define SYMCRYPT_SHA3_256_RESULT_SIZE (32) +#define SYMCRYPT_SHA3_256_INPUT_BLOCK_SIZE (136) + +VOID +SYMCRYPT_CALL +SymCryptSha3_256( + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_(SYMCRYPT_SHA3_256_RESULT_SIZE) PBYTE pbResult); + +VOID +SYMCRYPT_CALL +SymCryptSha3_256Init(_Out_ PSYMCRYPT_SHA3_256_STATE pState); + +VOID +SYMCRYPT_CALL +SymCryptSha3_256Append( + _Inout_ PSYMCRYPT_SHA3_256_STATE pState, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData); + +VOID +SYMCRYPT_CALL +SymCryptSha3_256Result( + _Inout_ PSYMCRYPT_SHA3_256_STATE pState, + _Out_writes_(SYMCRYPT_SHA3_256_RESULT_SIZE) PBYTE pbResult); + +VOID +SYMCRYPT_CALL +SymCryptSha3_256StateCopy(_In_ PCSYMCRYPT_SHA3_256_STATE pSrc, _Out_ PSYMCRYPT_SHA3_256_STATE pDst); + +VOID +SYMCRYPT_CALL +SymCryptSha3_256StateExport( + _In_ PCSYMCRYPT_SHA3_256_STATE pState, + _Out_writes_bytes_(SYMCRYPT_SHA3_256_STATE_EXPORT_SIZE) PBYTE pbBlob); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSha3_256StateImport( + _Out_ PSYMCRYPT_SHA3_256_STATE pState, + _In_reads_bytes_(SYMCRYPT_SHA3_256_STATE_EXPORT_SIZE) PCBYTE pbBlob); + +VOID +SYMCRYPT_CALL +SymCryptSha3_256Selftest(void); + +extern const PCSYMCRYPT_HASH SymCryptSha3_256Algorithm; + + +// +// SHA-3-384 +// + +#define SYMCRYPT_SHA3_384_RESULT_SIZE (48) +#define SYMCRYPT_SHA3_384_INPUT_BLOCK_SIZE (104) + +VOID +SYMCRYPT_CALL +SymCryptSha3_384( + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_(SYMCRYPT_SHA3_384_RESULT_SIZE) PBYTE pbResult); + +VOID +SYMCRYPT_CALL +SymCryptSha3_384Init(_Out_ PSYMCRYPT_SHA3_384_STATE pState); + +VOID +SYMCRYPT_CALL +SymCryptSha3_384Append( + _Inout_ PSYMCRYPT_SHA3_384_STATE pState, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData); + +VOID +SYMCRYPT_CALL +SymCryptSha3_384Result( + _Inout_ PSYMCRYPT_SHA3_384_STATE pState, + _Out_writes_(SYMCRYPT_SHA3_384_RESULT_SIZE) PBYTE pbResult); + +VOID +SYMCRYPT_CALL +SymCryptSha3_384StateCopy(_In_ PCSYMCRYPT_SHA3_384_STATE pSrc, _Out_ PSYMCRYPT_SHA3_384_STATE pDst); + +VOID +SYMCRYPT_CALL +SymCryptSha3_384StateExport( + _In_ PCSYMCRYPT_SHA3_384_STATE pState, + _Out_writes_bytes_(SYMCRYPT_SHA3_384_STATE_EXPORT_SIZE) PBYTE pbBlob); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSha3_384StateImport( + _Out_ PSYMCRYPT_SHA3_384_STATE pState, + _In_reads_bytes_(SYMCRYPT_SHA3_384_STATE_EXPORT_SIZE) PCBYTE pbBlob); + +VOID +SYMCRYPT_CALL +SymCryptSha3_384Selftest(void); + +extern const PCSYMCRYPT_HASH SymCryptSha3_384Algorithm; + + +// +// SHA-3-512 +// + +#define SYMCRYPT_SHA3_512_RESULT_SIZE (64) +#define SYMCRYPT_SHA3_512_INPUT_BLOCK_SIZE (72) + +VOID +SYMCRYPT_CALL +SymCryptSha3_512( + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_SHA3_512_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptSha3_512Init( _Out_ PSYMCRYPT_SHA3_512_STATE pState ); + +VOID +SYMCRYPT_CALL +SymCryptSha3_512Append( + _Inout_ PSYMCRYPT_SHA3_512_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptSha3_512Result( + _Inout_ PSYMCRYPT_SHA3_512_STATE pState, + _Out_writes_( SYMCRYPT_SHA3_512_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptSha3_512StateCopy( _In_ PCSYMCRYPT_SHA3_512_STATE pSrc, _Out_ PSYMCRYPT_SHA3_512_STATE pDst ); + +VOID +SYMCRYPT_CALL +SymCryptSha3_512StateExport( + _In_ PCSYMCRYPT_SHA3_512_STATE pState, + _Out_writes_bytes_( SYMCRYPT_SHA3_512_STATE_EXPORT_SIZE ) PBYTE pbBlob ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSha3_512StateImport( + _Out_ PSYMCRYPT_SHA3_512_STATE pState, + _In_reads_bytes_( SYMCRYPT_SHA3_512_STATE_EXPORT_SIZE) PCBYTE pbBlob ); + +VOID +SYMCRYPT_CALL +SymCryptSha3_512Selftest(void); + +extern const PCSYMCRYPT_HASH SymCryptSha3_512Algorithm; + + +//========================================================================== +// Extendable-Output Functions (XOFs) +//========================================================================== +// +// XOFs are similar to hash functions except that the output can be arbitrary length. +// SHAKE128 and SHAKE256 are XOFs specified in FIPS 202. +// +// SHAKE128(M, d) = KECCAK[256] (M || 1111, d) +// SHAKE256(M, d) = KECCAK[512] (M || 1111, d) +// +// SHAKEs share the same Keccak state as the other Keccak based algorithms under +// the name SYMCRYPT_SHAKEXxx_STATE. +// +// Both SHAKE128 and SHAKE256 have default result sizes (32- and 64-bytes resp.) +// that allows them to be used as substitutes for hash functions with the Init-Append-Result +// pattern. +// +// Extract is a new type of function that does not exist in hash functions, which can +// be called multiple times to successively generate output from the state. Extract +// function also provides the caller with a flag to wipe the state when no further Extract +// calls will be made. If the caller does not know in advance whether an Extract call is +// the final one, wiping can be performed later with an Init call or an Extract call with +// zero bytes output. +// +// If Append is called after an Extract call which did not wipe the state (i.e., the state +// is still in 'extract' mode), Append will notice this and switch from 'extract' mode to +// 'append' mode by wiping and initializing the state. This Append call effectively appends +// data for a fresh computation, saving an additional call to wipe/initialize the state. +// +// +// SYMCRYPT_SHAKEXXX_RESULT_SIZE +// +// Default output size, used by the SymCryptShakeXxxResult function. +// +// SYMCRYPT_SHAKEXXX_INPUT_BLOCK_SIZE +// +// Rate for the Keccak permutation. +// +// VOID +// SYMCRYPT_CALL +// SymCryptShakeXxxDefault( +// _In_reads_( cbData ) PCBYTE pbData, +// SIZE_T cbData, +// _Out_writes_( SYMCRYPT_SHAKEXXX_RESULT_SIZE ) PBYTE pbResult); +// +// SHAKE single-call function that produces default output size defined by +// SYMCRYPT_SHAKEXXX_RESULT_SIZE. +// +// VOID +// SYMCRYPT_CALL +// SymCryptShakeXxx( +// _In_reads_( cbData ) PCBYTE pbData, +// SIZE_T cbData, +// _Out_writes_( cbResult ) PBYTE pbResult, +// SIZE_T cbResult); +// +// SHAKE single-call function that produces variable-length output specified +// by the cbResult parameter. +// +// VOID +// SYMCRYPT_CALL +// SymCryptShakeXxxInit( _Out_ PSYMCRYPT_XXX_STATE pState ); +// +// Initializes the SHAKE state. +// +// VOID +// SYMCRYPT_CALL +// SymCryptShakeXxxAppend( +// _Inout_ PSYMCRYPT_XXX_STATE pState, +// _In_reads_( cbData ) PCBYTE pbData, +// SIZE_T cbData ); +// +// Appends data to the SHAKE state. +// +// Append cannot be the first call to an uninitialized SHAKE state. All +// other uses independent of whether the state is in 'append' mode or 'extract' +// mode are well defined. If the state was previously in 'extract' mode, (i.e., after +// an Extract call with bWipe=FALSE) it wipes/resets the state and the data is +// appended to a fresh state. +// +// VOID +// SYMCRYPT_CALL +// SymCryptShakeXxxExtract( +// _Inout_ PSYMCRYPT_XXX_STATE pState, +// _Out_writes_(cbResult) PBYTE pbResult, +// SIZE_T cbResult, +// BOOLEAN bWipe); +// +// Generates output from the SHAKE state. +// +// Extract cannot be the first call to an uninitialized SHAKE state. All +// other uses independent of whether the state is in 'append' mode or 'extract' mode +// are well defined. +// +// If the state was in 'append' mode before the Extract call, Extract switches +// the state to 'extract' mode and generates the requested number of bytes from +// the state. Extract wipes/resets the state and transitions the state to 'append' +// mode if bWipe=TRUE, otherwise leaving the state in 'extract' mode, available for +// further extractions. +// +// VOID +// SYMCRYPT_CALL +// SymCryptShakeXxxResult( +// _Inout_ PSYMCRYPT_XXX_STATE pState, +// _Out_writes_(SYMCRYPT_SHAKEXXX_RESULT_SIZE) PBYTE pbResult ); +// +// Extracts SYMCRYPT_SHAKEXXX_RESULT_SIZE bytes from the state and wipes/resets +// it for a new computation. +// +// Result cannot be called with an uninitialized state. All other uses are well +// defined. If it is called after an Extract call with bWipe=FALSE, it does the +// final extraction from the state for SYMCRYPT_SHAKEXXX_RESULT_SIZE bytes, +// effectively calling Extract with cbResult=SYMCRYPT_SHAKEXXX_RESULT_SIZE and +// bWipe=TRUE. +// +// VOID +// SYMCRYPT_CALL +// SymCryptShakeXxxStateCopy(_In_ PCSYMCRYPT_SHAKEXXX_STATE pSrc, _Out_ PSYMCRYPT_SHAKEXXX_STATE pDst); +// +// Create a new copy of the state object. +// +// VOID +// SYMCRYPT_CALL +// SymCryptShakeXxxSelftest(void); +// +// Perform a minimal self-test on the ShakeXxx algorithm. +// This function is designed to be used for achieving FIPS 140-2 compliance or +// to provide a simple self-test when an application starts. +// +// If an error is detected, a platform-specific fatal error action is taken. +// Callers do not need to handle any error conditions. + + +// +// SHAKE128 +// +#define SYMCRYPT_SHAKE128_RESULT_SIZE (32) +#define SYMCRYPT_SHAKE128_INPUT_BLOCK_SIZE (168) + +VOID +SYMCRYPT_CALL +SymCryptShake128Default( + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_SHAKE128_RESULT_SIZE ) PBYTE pbResult); + +VOID +SYMCRYPT_CALL +SymCryptShake128( + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( cbResult ) PBYTE pbResult, + SIZE_T cbResult); + +VOID +SYMCRYPT_CALL +SymCryptShake128Init( _Out_ PSYMCRYPT_SHAKE128_STATE pState ); + +VOID +SYMCRYPT_CALL +SymCryptShake128Append( + _Inout_ PSYMCRYPT_SHAKE128_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptShake128Extract( + _Inout_ PSYMCRYPT_SHAKE128_STATE pState, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult, + BOOLEAN bWipe); + +VOID +SYMCRYPT_CALL +SymCryptShake128Result( + _Inout_ PSYMCRYPT_SHAKE128_STATE pState, + _Out_writes_(SYMCRYPT_SHAKE128_RESULT_SIZE) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptShake128StateCopy(_In_ PCSYMCRYPT_SHAKE128_STATE pSrc, _Out_ PSYMCRYPT_SHAKE128_STATE pDst); + +VOID +SYMCRYPT_CALL +SymCryptShake128Selftest(void); + +extern const PCSYMCRYPT_HASH SymCryptShake128HashAlgorithm; + +// +// SHAKE256 +// +#define SYMCRYPT_SHAKE256_RESULT_SIZE (64) +#define SYMCRYPT_SHAKE256_INPUT_BLOCK_SIZE (136) + +VOID +SYMCRYPT_CALL +SymCryptShake256Default( + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_SHAKE256_RESULT_SIZE ) PBYTE pbResult); + +VOID +SYMCRYPT_CALL +SymCryptShake256( + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( cbResult ) PBYTE pbResult, + SIZE_T cbResult); + +VOID +SYMCRYPT_CALL +SymCryptShake256Init( _Out_ PSYMCRYPT_SHAKE256_STATE pState ); + +VOID +SYMCRYPT_CALL +SymCryptShake256Append( + _Inout_ PSYMCRYPT_SHAKE256_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptShake256Extract( + _Inout_ PSYMCRYPT_SHAKE256_STATE pState, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult, + BOOLEAN bWipe); + +VOID +SYMCRYPT_CALL +SymCryptShake256Result( + _Inout_ PSYMCRYPT_SHAKE256_STATE pState, + _Out_writes_(SYMCRYPT_SHAKE256_RESULT_SIZE) PBYTE pbResult ); + + +VOID +SYMCRYPT_CALL +SymCryptShake256StateCopy(_In_ PCSYMCRYPT_SHAKE256_STATE pSrc, _Out_ PSYMCRYPT_SHAKE256_STATE pDst); + +VOID +SYMCRYPT_CALL +SymCryptShake256Selftest(void); + +extern const PCSYMCRYPT_HASH SymCryptShake256HashAlgorithm; + +//========================================================================== +// Customizable Extendable-Output Functions (XOFs) +//========================================================================== +// +// cSHAKE128 and cSHAKE256 are customizable SHAKE functions specified in NIST SP 800-185. +// +// When cSHAKE input strings N (function name string) and S (customization string) are +// both empty, cSHAKE is equivalent to SHAKE: +// +// cSHAKE128(X, L, "", "") = SHAKE128(X, L) +// cSHAKE256(X, L, "", "") = SHAKE256(X, L) +// +// If at least one of N and S is non-empty, cSHAKE is defined as follows: +// +// cSHAKE128(X, L, N, S) = KECCAK[256](bytepad(encode_string(N) || encode_string(S), 168) || X || 00, L) +// cSHAKE256(X, L, N, S) = KECCAK[512](bytepad(encode_string(N) || encode_string(S), 136) || X || 00, L) +// +// The following functions are equivalent to their SHAKE counterparts. +// SymCryptCShakeXxxExtract with bWipe=TRUE and SymCryptCShakeXxxResult functions reset +// the cSHAKE state to an empty SHAKE state after generating output. This behavior is +// equivalent to calling SymCryptCShakeXxxInit with empty input strings. +// +// SymCryptCShakeXxxAppend +// SymCryptCShakeXxxExtract +// SymCryptCShakeXxxResult +// +// Calling SymCryptCShakeXxxAppend when cSHAKE state is in 'extract' mode results +// in the same behavior described above: the state is wiped and initialized with +// empty input strings, after which the data is appended to the empty state. This +// converts the state to a SHAKE state since cSHAKE with empty input strings is +// equivalent to SHAKE. This is a consequence of not being able to store the input +// strings to cSHAKE and re-initialize it with them. Thus, if multiple cSHAKE +// computations with the same input strings are to be carried out, cSHAKE state must +// be initialized with the input strings each time. +// +// The following functions differ from the SHAKE by the introduction of customization +// strings: +// +// VOID +// SYMCRYPT_CALL +// SymCryptCShakeXxx( +// _In_reads_( cbFunctionNameString ) PCBYTE pbFunctionNameString, +// SIZE_T cbFunctionNameString, +// _In_reads_( cbCustomizationString ) PCBYTE pbCustomizationString, +// SIZE_T cbCustomizationString, +// _In_reads_( cbData ) PCBYTE pbData, +// SIZE_T cbData, +// _Out_writes_( cbResult ) PBYTE pbResult, +// SIZE_T cbResult); +// +// Single-call cSHAKE computation. +// +// VOID +// SYMCRYPT_CALL +// SymCryptCShakeXxxInit( +// _Out_ PSYMCRYPT_CSHAKEXXX_STATE pState, +// _In_reads_( cbFunctionNameString ) PCBYTE pbFunctionNameString, +// SIZE_T cbFunctionNameString, +// _In_reads_( cbCustomizationString ) PCBYTE pbCustomizationString, +// SIZE_T cbCustomizationString); +// +// Initializes the cSHAKE state with the provided input strings. If both of +// the input strings are empty, the call is equivalent to SymCryptShakeXxxInit, +// otherwise the input strings will be encoded and appended to the state. + + +// +// cSHAKE128 +// +#define SYMCRYPT_CSHAKE128_RESULT_SIZE SYMCRYPT_SHAKE128_RESULT_SIZE +#define SYMCRYPT_CSHAKE128_INPUT_BLOCK_SIZE SYMCRYPT_SHAKE128_INPUT_BLOCK_SIZE + +VOID +SYMCRYPT_CALL +SymCryptCShake128( + _In_reads_( cbFunctionNameString ) PCBYTE pbFunctionNameString, + SIZE_T cbFunctionNameString, + _In_reads_( cbCustomizationString ) PCBYTE pbCustomizationString, + SIZE_T cbCustomizationString, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( cbResult ) PBYTE pbResult, + SIZE_T cbResult); + +VOID +SYMCRYPT_CALL +SymCryptCShake128Init( + _Out_ PSYMCRYPT_CSHAKE128_STATE pState, + _In_reads_( cbFunctionNameString ) PCBYTE pbFunctionNameString, + SIZE_T cbFunctionNameString, + _In_reads_( cbCustomizationString ) PCBYTE pbCustomizationString, + SIZE_T cbCustomizationString); + +VOID +SYMCRYPT_CALL +SymCryptCShake128Append( + _Inout_ PSYMCRYPT_CSHAKE128_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptCShake128Extract( + _Inout_ PSYMCRYPT_CSHAKE128_STATE pState, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult, + BOOLEAN bWipe); + +VOID +SYMCRYPT_CALL +SymCryptCShake128Result( + _Inout_ PSYMCRYPT_CSHAKE128_STATE pState, + _Out_writes_( SYMCRYPT_CSHAKE128_RESULT_SIZE ) PBYTE pbResult); + +VOID +SYMCRYPT_CALL +SymCryptCShake128StateCopy(_In_ PCSYMCRYPT_CSHAKE128_STATE pSrc, _Out_ PSYMCRYPT_CSHAKE128_STATE pDst); + +VOID +SYMCRYPT_CALL +SymCryptCShake128Selftest(void); + + +// +// cSHAKE256 +// +#define SYMCRYPT_CSHAKE256_RESULT_SIZE SYMCRYPT_SHAKE256_RESULT_SIZE +#define SYMCRYPT_CSHAKE256_INPUT_BLOCK_SIZE SYMCRYPT_SHAKE256_INPUT_BLOCK_SIZE + +VOID +SYMCRYPT_CALL +SymCryptCShake256( + _In_reads_( cbFunctionNameString ) PCBYTE pbFunctionNameString, + SIZE_T cbFunctionNameString, + _In_reads_( cbCustomizationString ) PCBYTE pbCustomizationString, + SIZE_T cbCustomizationString, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( cbResult ) PBYTE pbResult, + SIZE_T cbResult); + +VOID +SYMCRYPT_CALL +SymCryptCShake256Init( + _Out_ PSYMCRYPT_CSHAKE256_STATE pState, + _In_reads_( cbFunctionNameString ) PCBYTE pbFunctionNameString, + SIZE_T cbFunctionNameString, + _In_reads_( cbCustomizationString ) PCBYTE pbCustomizationString, + SIZE_T cbCustomizationString); + +VOID +SYMCRYPT_CALL +SymCryptCShake256Append( + _Inout_ PSYMCRYPT_CSHAKE256_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptCShake256Extract( + _Inout_ PSYMCRYPT_CSHAKE256_STATE pState, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult, + BOOLEAN bWipe); + +VOID +SYMCRYPT_CALL +SymCryptCShake256Result( + _Inout_ PSYMCRYPT_CSHAKE256_STATE pState, + _Out_writes_( SYMCRYPT_CSHAKE256_RESULT_SIZE ) PBYTE pbResult); + +VOID +SYMCRYPT_CALL +SymCryptCShake256StateCopy(_In_ PCSYMCRYPT_CSHAKE256_STATE pSrc, _Out_ PSYMCRYPT_CSHAKE256_STATE pDst); + +VOID +SYMCRYPT_CALL +SymCryptCShake256Selftest(void); + + + +//========================================================================== +// PARALLELISED HASH FUNCTIONS +//========================================================================== +// +// On some platforms it is possible to parallelize the hash function +// computation to achieve a higher throughput. +// The parallel hash APIs support this. +// The parallel implementation tries to perform the computations as efficiently +// as possible. Applications that have many hashes to compute can always call these +// functions; the library will optimize the computation to the current situation. +// For example, if only a single hash is computed using these APIs, the +// single-hash version is used to achieve full single-hash speed. +// On platforms that do not support parallel hash implementations, these functions +// are still available, and will implement the parallel hashing by computing the +// hashes one at a time. +// +// +// SYMCRYPT_PARALLEL_XXX_MIN_PARALLELISM +// +// Compile-time constant, but can vary per platform. +// Minimum number of parallel computations at which +// the parallel implementation is faster on at least some CPU versions. +// Applications can safely ask for parallel computations with fewer hashes, +// but there will be no speed gain. +// +// SYMCRYPT_PARALLEL_XXX_MAX_PARALLELISM +// +// Maximum internal parallelism that the library uses internally on at least one +// CPU version of this architecture. +// If all hash computations are the same length, then there is no significant +// benefit to providing more than this number of hash requests in parallel. +// However, if the hash computations are of different lengths then the library +// overlaps various hash computations and still gains efficiency when the +// number of parallel hash computations increases past this bound. +// Note that the internal parallelism that can be used might depend +// on the CPU features available, so this value is only an upper bound. +// We recommend that callers provide as much parallelism as practical, +// and let the library perform the optimal sequence of computations. +// +// SYMCRYPT_HASH_OPERATION_TYPE +// +// An enum that specifies which operation is to be performed in a command +// structure passed to a parallel hash operations function. +// Defined values: +// SYMCRYPT_HASH_OPERATION_APPEND; +// SYMCRYPT_HASH_OPERATION_RESULT; +// +// SYMCRYPT_PARALLEL_HASH_OPERATION +// +// Structure that contains a command to be performed on a single item in a +// parallel hash state array. Visible fields are: +// +// SIZE_T iHash; // index of hash object into the state array +// SYMCRYPT_HASH_OPERATION_TYPE hashOperation; // operation to be performed +// PBYTE pbBuffer; // data to be hashed, or result buffer +// SIZE_T cbBuffer; +// +// There might be other fields in this structure that the caller should not use or assume anything about. +// +// SymCryptParallelXxxInit( +// _Out_writes_( nStates ) PSYMCRYPT_XXX_STATE pStates, +// SIZE_T nStates ); +// Initialize an array of hash states. +// The elements of the array are normal hash states, and they can be +// manipulated individually using the standard functions for the hash +// algorithm. +// +// Functionally equivalent to: +// for( i=0; i<nStates; i++ ) { +// SymCryptXxxInit( &pStates[i] ); +// } +// +// It is not necessary to use this function to initialize a state array; +// the normal initialization function can also be used, but this function might +// be faster. +// +// SymCryptParallelXxxProcess( +// _Inout_updates_( nStates ) PSYMCRYPT_XXX_STATE pStates, +// SIZE_T nStates, +// _Inout_updates_( nOperations ) PSYMCRYPT_PARALLEL_HASH_OPERATION pOperation, +// SIZE_T nOperations, +// _Out_writes_( cbScratch ) PBYTE pbScratch, +// SIZE_T cbScratch ); +// +// Perform optionally parallel processing of hashes. +// This is functionally equivalent to iterating over the pOperations array in order, +// and executing the command in each PARALLEL_HASH_OPERATION one at a time. +// For each command: +// iHash Which hash state this operation applies to; must be < nStates. +// hashOperation Specifies whether this is an append or result operation. +// pbBuffer The buffer that contains the data to be hashed, or that will receive the result. +// cbBuffer The size of pbBuffer. (Must be equal to the hash algorithm result size for RESULT operations.) +// As the SAL annotations document, the pOperations array is updated by this function, and therefore +// it cannot be in read-only memory. +// The updates modify only to the internal scratch space that is reserved +// in the SYMCRYPT_PARALLEL_HASH_OPERATION structure; none of the documented fields +// (iHash, hashOperation, pbBuffer, cbBuffer) are modified. +// The scratch fields are used purely within one call to this function, their value does not have to be +// maintained between function calls. The scratch fields do not have to be initialized by the caller +// of this function, +// THREAD SAFETY: as the pOperations array is updated, it CANNOT be shared between different threads. +// Obviously, the same is true of pStates and pbScratch. +// +// The pbScratch pointer provides a scratch buffer for the parallel processing function. +// This is used to organize the request and perform the functions in an optimal order for +// maximum parallelism, and for storing intermediate results that are too large +// to fit on the stack. The scratch buffer must be at least +// SYMCRYPT_PARALLEL_XXX_FIXED_SCRATCH + nStates * SYMCRYPT_PARALLEL_HASH_PER_STATE_SCRATCH +// bytes in size. +// +// For incremental hashing, we recommend that callers process data sizes that are +// a multiple of the SYMCRYPT_XXX_INPUT_BLOCK_LEN. +// + + +VOID +SYMCRYPT_CALL +SymCryptParallelSha256Init( + _Out_writes_( nStates ) PSYMCRYPT_SHA256_STATE pStates, + SIZE_T nStates ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptParallelSha256Process( + _Inout_updates_( nStates ) PSYMCRYPT_SHA256_STATE pStates, + SIZE_T nStates, + _Inout_updates_( nOperations ) PSYMCRYPT_PARALLEL_HASH_OPERATION pOperations, + SIZE_T nOperations, + _Out_writes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + + +VOID +SYMCRYPT_CALL +SymCryptParallelSha384Init( + _Out_writes_( nStates ) PSYMCRYPT_SHA384_STATE pStates, + SIZE_T nStates ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptParallelSha384Process( + _Inout_updates_( nStates ) PSYMCRYPT_SHA384_STATE pStates, + SIZE_T nStates, + _Inout_updates_( nOperations ) PSYMCRYPT_PARALLEL_HASH_OPERATION pOperations, + SIZE_T nOperations, + _Out_writes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + + +VOID +SYMCRYPT_CALL +SymCryptParallelSha512Init( + _Out_writes_( nStates ) PSYMCRYPT_SHA512_STATE pStates, + SIZE_T nStates ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptParallelSha512Process( + _Inout_updates_( nStates ) PSYMCRYPT_SHA512_STATE pStates, + SIZE_T nStates, + _Inout_updates_( nOperations ) PSYMCRYPT_PARALLEL_HASH_OPERATION pOperations, + SIZE_T nOperations, + _Out_writes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + + +VOID +SYMCRYPT_CALL +SymCryptParallelSha256Selftest(void); + +VOID +SYMCRYPT_CALL +SymCryptParallelSha384Selftest(void); + +VOID +SYMCRYPT_CALL +SymCryptParallelSha512Selftest(void); + + + +//========================================================================== +// MESSAGE AUTHENTICATION CODE (MAC) +//========================================================================== +// +// All MAC functions have a similar interface. For consistency we describe +// the generic parts of the interface once. +// Algorithm-specific comments are given with the API functions of each algorithm separately. +// +// For a MAC algorithm called XXX the following functions, types, and constants are defined: +// +// +// SYMCRYPT_XXX_RESULT_SIZE +// +// A constant giving is the size, in bytes, of the result of the MAC function. +// Some applications use truncated MAC functions. These are not directly supported +// by this library. Applications will have to perform the truncation themselves. +// +// +// SYMCRYPT_XXX_INPUT_BLOCK_SIZE +// +// A constant giving the natural input block size for the MAC function. +// Most callers don't need to know this, but in some cases it can be useful +// for optimizations. +// +// +// SYMCRYPT_XXX_EXPANDED_KEY +// +// Type which contains a key with all the pre-computations performed. +// This is an opaque type whose structure can change at will. +// It should only be used for transient computations in a single executable +// and not be stored or transferred to a different environment. +// The pointer and const-pointer versions are also declared +// (PSYMCRYPOT_XXX_EXPANDED_KEY and PCSYMCRYPT_XXX_EXPANDED_KEY). +// +// The EXPANDED_KEY structure contains keying material and should be wiped +// once it is no longer used. (See SymCryptWipe & SymCryptWipeKnownSize) +// +// Once a key has been expanded, multiple threads can simultaneously use the same expanded key +// object for different MAC computations that use the same key as the expanded key +// object does not change value. +// +// +// SYMCRYPT_ERROR +// SYMCRYPT_CALL +// SymCryptXxxExpandKey( _Out_ PSYMCRYPT_XXX_EXPANDED_KEY pExpandedKey, +// _In_reads_(cbKey) PCBYTE pbKey, +// SIZE_T cbKey ); +// +// Prepare a key for future use by the Xxx algorithm. +// This function performs pre-computations on the key +// to speed up the actual MAC computations later, and stores the result as an expanded key. +// The expanded key must be kept unchanged until all MAC computations that use the key are finished. +// When the key is no longer needed the expanded key structure should be wiped. +// +// Different algorithms pose different requirements on the length of the key. +// If the key that is provided is of an unsupported length the SYMCRYPT_WRONG_KEY_SIZE error is returned. +// In this case the expanded key structure will not contain any keying material and does not have to be wiped. +// +// +// VOID +// SYMCRYPT_CALL +// SymCryptXxxKeyCopy( _In_ PCSYMCRYPT_XXX_EXPANDED_KEY pSrc, +// _Out_ PSYMCRYPT_XXX_EXPANDED_KEY pDst ); +// +// Create a copy of an expanded key. +// +// VOID +// SYMCRYPT_CALL +// SymCryptXxx( _In_ PCSYMCRYPT_XXX_EXPANDED_KEY pExpandedKey, +// _In_reads_( cbData ) PCBYTE pbData, +// SIZE_T cbData, +// _Out_writes_( SYMCRYPT_XXX_RESULT_SIZE ) PBYTE pbResult ); +// +// Computes the MAC value of the data buffer with a given key. +// If you have all the data to be MACed in a single buffer this is the simplest function to use. +// +// +// SYMCRYPT_XXX_STATE +// +// The state encodes an ongoing MAC computation and allows incremental +// computation of a MAC function. +// At any point in time the state encodes a state that is equivalent to +// the MAC computation of a data string X with the key specified during initialization of the state. +// The SymCryptXxxInit() function initializes a state. +// The SymCryptXxxAppend() function appends data to the data string X. +// The SymCryptXxxResult() function returns the final MAC result. +// +// The state is an opaque type whose structure can change at will. +// It should only be used for transient computations in a single executable +// and not be stored or transferred to a different environment. +// +// Once initialized using SymCryptXxxInit, the state contains sensitive keying information. +// The SymCryptXxxResult function wipes the sensitive information from the state. +// Callers can also wipe the structure themselves if it is no longer needed. +// +// The state can be duplicated using the SymCryptXxxStateCopy function. This supports +// applications that compute the MAC over a prefix and then duplicate the state to +// compute the MAC using multiple different continuations. +// +// +// VOID +// SYMCRYPT_CALL +// SymCryptXxxStateCopy( +// _In_ PCSYMCRYPT_XXX_STATE pSrc, +// _In_opt_ PCSYMCRYPT_XXX_EXPANDED_KEY pExpandedKey, +// _Out_ PSYMCRYPT_XXX_STATE pDst ); +// +// Create a copy of the pSrc state in pDst. If pExpandedKey is NULL, the pDst state +// uses the same expanded key as the pSrc state did. If pExpandedKey is not NULL, +// it must point to an expanded key that contains the same key material as the key +// used by pSrc. (For example, a copy of the expanded key that pSrc uses.) +// +// VOID +// SYMCRYPT_CALL +// SymCryptXxxInit( _Out_ PSYMCRYPT_XXX_STATE pState, +// _In_ PCSYMCRYPT_XXX_EXPANDED_KEY pExpandedKey); +// +// Initialize a SYMCRYPT_XXX_STATE for subsequent use with the provided key. +// +// This function can be called at any time and resets the state to correspond +// to the empty data string with the newly specified key. +// The SymCryptXxxAppend function appends data to the data string +// encoded by the state. +// The SymCryptXxxResult function finalizes the computation and +// returns the actual MAC result. +// +// This function typically stores a pointer to the expanded key in the state. +// The expanded key must remain unchanged in +// memory until the SYMCRYPT_XXX_STATE structure is no longer used. +// +// After initialization the state contains sensitive keying materials, and should +// be wiped when the state is no longer used. The SymCryptXxxResult() function +// also wipes the state, so this is only a concern for aborted MAC computations. +// Note that SymCryptXxxResult() does not wipe the expanded key; callers are always +// responsible for wiping the expanded key. +// +// +// VOID +// SYMCRYPT_CALL +// SymCryptXxxAppend( _Inout_ PSYMCRYPT_XXX_STATE pState, +// _In_reads_( cbData ) PCBYTE pbData, +// SIZE_T cbData ); +// +// Provide more data to the ongoing MAC computation specified by the state. +// The state must have been initialized by SymCryptXxxInit. +// This function can be called multiple times on the same state +// to append more data to the encoded data string. +// +// The SYMCRYPT_XXX_STATE structure contains the entire state of an ongoing +// MAC computation. If you want to MAC some data and then continue with +// multiple other strings you may create one or more copies of the state. +// (The expanded key must remain unchanged in memory until all copies of the state +// are no longer used.) +// +// +// VOID +// SYMCRYPT_CALL +// SymCryptXxxResult( +// _Inout_ PSYMCRYPT_XXX_STATE pState, +// _Out_writes_( SYMCRYPT_XXX_RESULT_SIZE ) PBYTE pbResult ); +// +// Returns the MAC result of the state. +// If the state was newly initialized this returns the MAC of the empty string +// using the key specified in the SymCryptXxxInit call. +// If one or more SymCryptXxxAppend function calls were made on this state +// it returns the MAC of the concatenation of all the data strings +// passed to SymCryptXxxAppend using the specified key. +// +// The state is wiped to remove any traces of sensitive data. +// To use the same state for another MAC computation you must call +// SymCryptXxxInit again to re-initialize the state. +// This behaviour is different from hash function states that are re-initialized for +// use by the Result routine. This difference is by design; re-initializing a hash +// state is a safe operation. Re-initializing a MAC state puts keying information +// in the state, and callers would have to wipe the MAC state explicitly. +// +// +// VOID +// SYMCRYPT_CALL +// SymCryptXxxSelftest(void); +// +// Perform a minimal self-test on the XXX algorithm. +// This function is designed to be used for achieving FIPS 140-2 compliance or +// to provide a simple self-test when an application starts. +// +// If an error is detected, a platform-specific fatal error action is taken. +// Callers do not need to handle any error conditions. +// +// +// We also have the Generic HMAC API where the hash function to be used in the HMAC +// computation can be selected at runtime. +// + +typedef enum _SYMCRYPT_MAC_ID +{ + SYMCRYPT_MAC_ID_NULL = 0, + SYMCRYPT_MAC_ID_HMAC_MD5 = 1, + SYMCRYPT_MAC_ID_HMAC_SHA1 = 2, + SYMCRYPT_MAC_ID_HMAC_SHA224 = 3, + SYMCRYPT_MAC_ID_HMAC_SHA256 = 4, + SYMCRYPT_MAC_ID_HMAC_SHA384 = 5, + SYMCRYPT_MAC_ID_HMAC_SHA512 = 6, + SYMCRYPT_MAC_ID_HMAC_SHA512_224 = 7, + SYMCRYPT_MAC_ID_HMAC_SHA512_256 = 8, + SYMCRYPT_MAC_ID_HMAC_SHA3_224 = 9, + SYMCRYPT_MAC_ID_HMAC_SHA3_256 = 10, + SYMCRYPT_MAC_ID_HMAC_SHA3_384 = 11, + SYMCRYPT_MAC_ID_HMAC_SHA3_512 = 12, + SYMCRYPT_MAC_ID_AES_CMAC = 13, + SYMCRYPT_MAC_ID_KMAC_128 = 14, + SYMCRYPT_MAC_ID_KMAC_256 = 15 +} SYMCRYPT_MAC_ID; + +PCSYMCRYPT_MAC +SYMCRYPT_CALL +SymCryptGetMacAlgorithm( SYMCRYPT_MAC_ID macId ); +// +// Returns a pointer to the MAC algorithm structure for the specified MAC ID. +// Returns NULL if the MAC ID is invalid. +// + +// +// Generic HMAC API with parametrized hash function +// +VOID +SYMCRYPT_CALL +SymCryptHmacStateCopy( + _In_ PCSYMCRYPT_HMAC_STATE pSrc, + _In_opt_ PCSYMCRYPT_HMAC_EXPANDED_KEY pExpandedKey, + _Out_ PSYMCRYPT_HMAC_STATE pDst ); + +VOID +SYMCRYPT_CALL +SymCryptHmacKeyCopy( + _In_ PCSYMCRYPT_HMAC_EXPANDED_KEY pSrc, + _Out_ PSYMCRYPT_HMAC_EXPANDED_KEY pDst ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHmacExpandKey( + _In_ PCSYMCRYPT_HASH pHash, + _Out_ PSYMCRYPT_HMAC_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ); + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptHmacInit( + _Out_ PSYMCRYPT_HMAC_STATE pState, + _In_ PCSYMCRYPT_HMAC_EXPANDED_KEY pExpandedKey ); + +VOID +SYMCRYPT_CALL +SymCryptHmacAppend( + _Inout_ PSYMCRYPT_HMAC_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptHmacResult( + _Inout_ PSYMCRYPT_HMAC_STATE pState, + _Out_writes_( pState->pKey->pHash->resultSize ) PBYTE pbResult ); + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptHmac( + _In_ PCSYMCRYPT_HMAC_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( pExpandedKey->pHash->resultSize ) PBYTE pbResult ); + + +//////////////////////////////////////////////////////////////////////////// +// HMAC-MD5 +// +// + +#define SYMCRYPT_HMAC_MD5_RESULT_SIZE SYMCRYPT_MD5_RESULT_SIZE +#define SYMCRYPT_HMAC_MD5_INPUT_BLOCK_SIZE SYMCRYPT_MD5_INPUT_BLOCK_SIZE + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHmacMd5ExpandKey( + _Out_ PSYMCRYPT_HMAC_MD5_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ); +// +// Supports all key lengths; never returns an error. +// + +VOID +SYMCRYPT_CALL +SymCryptHmacMd5KeyCopy( + _In_ PCSYMCRYPT_HMAC_MD5_EXPANDED_KEY pSrc, + _Out_ PSYMCRYPT_HMAC_MD5_EXPANDED_KEY pDst ); + + +VOID +SYMCRYPT_CALL +SymCryptHmacMd5( + _In_ PCSYMCRYPT_HMAC_MD5_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_HMAC_MD5_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptHmacMd5StateCopy( + _In_ PCSYMCRYPT_HMAC_MD5_STATE pSrc, + _In_opt_ PCSYMCRYPT_HMAC_MD5_EXPANDED_KEY pExpandedKey, + _Out_ PSYMCRYPT_HMAC_MD5_STATE pDst ); + +VOID +SYMCRYPT_CALL +SymCryptHmacMd5Init( + _Out_ PSYMCRYPT_HMAC_MD5_STATE pState, + _In_ PCSYMCRYPT_HMAC_MD5_EXPANDED_KEY pExpandedKey); + +VOID +SYMCRYPT_CALL +SymCryptHmacMd5Append( + _Inout_ PSYMCRYPT_HMAC_MD5_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptHmacMd5Result( + _Inout_ PSYMCRYPT_HMAC_MD5_STATE pState, + _Out_writes_( SYMCRYPT_HMAC_MD5_RESULT_SIZE )PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptHmacMd5Selftest(void); + +extern const PCSYMCRYPT_MAC SymCryptHmacMd5Algorithm; + +//////////////////////////////////////////////////////////////////////////// +// HMAC-SHA-1 +// +// + +#define SYMCRYPT_HMAC_SHA1_RESULT_SIZE SYMCRYPT_SHA1_RESULT_SIZE +#define SYMCRYPT_HMAC_SHA1_INPUT_BLOCK_SIZE SYMCRYPT_SHA1_INPUT_BLOCK_SIZE + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHmacSha1ExpandKey( + _Out_ PSYMCRYPT_HMAC_SHA1_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ); +// +// Supports all key lengths; never returns an error. +// + +VOID +SYMCRYPT_CALL +SymCryptHmacSha1KeyCopy( + _In_ PCSYMCRYPT_HMAC_SHA1_EXPANDED_KEY pSrc, + _Out_ PSYMCRYPT_HMAC_SHA1_EXPANDED_KEY pDst ); + + +VOID +SYMCRYPT_CALL +SymCryptHmacSha1( + _In_ PCSYMCRYPT_HMAC_SHA1_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_HMAC_SHA1_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha1StateCopy( + _In_ PCSYMCRYPT_HMAC_SHA1_STATE pSrc, + _In_opt_ PCSYMCRYPT_HMAC_SHA1_EXPANDED_KEY pExpandedKey, + _Out_ PSYMCRYPT_HMAC_SHA1_STATE pDst ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha1Init( + _Out_ PSYMCRYPT_HMAC_SHA1_STATE pState, + _In_ PCSYMCRYPT_HMAC_SHA1_EXPANDED_KEY pExpandedKey); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha1Append( + _Inout_ PSYMCRYPT_HMAC_SHA1_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha1Result( + _Inout_ PSYMCRYPT_HMAC_SHA1_STATE pState, + _Out_writes_( SYMCRYPT_HMAC_SHA1_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha1Selftest(void); + +extern const PCSYMCRYPT_MAC SymCryptHmacSha1Algorithm; + +//////////////////////////////////////////////////////////////////////////// +// HMAC-SHA-224 +// +// This implementation is meant for interoperability and is not recommended for use. +// +// + +#define SYMCRYPT_HMAC_SHA224_RESULT_SIZE SYMCRYPT_SHA224_RESULT_SIZE +#define SYMCRYPT_HMAC_SHA224_INPUT_BLOCK_SIZE SYMCRYPT_SHA224_INPUT_BLOCK_SIZE + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHmacSha224ExpandKey( + _Out_ PSYMCRYPT_HMAC_SHA224_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ); +// +// Supports all key lengths; never returns an error. +// + +VOID +SYMCRYPT_CALL +SymCryptHmacSha224KeyCopy( + _In_ PCSYMCRYPT_HMAC_SHA224_EXPANDED_KEY pSrc, + _Out_ PSYMCRYPT_HMAC_SHA224_EXPANDED_KEY pDst ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha224( + _In_ PCSYMCRYPT_HMAC_SHA224_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_HMAC_SHA224_RESULT_SIZE )PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha224StateCopy( + _In_ PCSYMCRYPT_HMAC_SHA224_STATE pSrc, + _In_opt_ PCSYMCRYPT_HMAC_SHA224_EXPANDED_KEY pExpandedKey, + _Out_ PSYMCRYPT_HMAC_SHA224_STATE pDst ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha224Init( + _Out_ PSYMCRYPT_HMAC_SHA224_STATE pState, + _In_ PCSYMCRYPT_HMAC_SHA224_EXPANDED_KEY pExpandedKey); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha224Append( + _Inout_ PSYMCRYPT_HMAC_SHA224_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha224Result( + _Inout_ PSYMCRYPT_HMAC_SHA224_STATE pState, + _Out_writes_( SYMCRYPT_HMAC_SHA224_RESULT_SIZE )PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha224Selftest(void); + +extern const PCSYMCRYPT_MAC SymCryptHmacSha224Algorithm; + +//////////////////////////////////////////////////////////////////////////// +// HMAC-SHA-256 +// +// + +#define SYMCRYPT_HMAC_SHA256_RESULT_SIZE SYMCRYPT_SHA256_RESULT_SIZE +#define SYMCRYPT_HMAC_SHA256_INPUT_BLOCK_SIZE SYMCRYPT_SHA256_INPUT_BLOCK_SIZE + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHmacSha256ExpandKey( + _Out_ PSYMCRYPT_HMAC_SHA256_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ); +// +// Supports all key lengths; never returns an error. +// + +VOID +SYMCRYPT_CALL +SymCryptHmacSha256KeyCopy( + _In_ PCSYMCRYPT_HMAC_SHA256_EXPANDED_KEY pSrc, + _Out_ PSYMCRYPT_HMAC_SHA256_EXPANDED_KEY pDst ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha256( + _In_ PCSYMCRYPT_HMAC_SHA256_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_HMAC_SHA256_RESULT_SIZE )PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha256StateCopy( + _In_ PCSYMCRYPT_HMAC_SHA256_STATE pSrc, + _In_opt_ PCSYMCRYPT_HMAC_SHA256_EXPANDED_KEY pExpandedKey, + _Out_ PSYMCRYPT_HMAC_SHA256_STATE pDst ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha256Init( + _Out_ PSYMCRYPT_HMAC_SHA256_STATE pState, + _In_ PCSYMCRYPT_HMAC_SHA256_EXPANDED_KEY pExpandedKey); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha256Append( + _Inout_ PSYMCRYPT_HMAC_SHA256_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha256Result( + _Inout_ PSYMCRYPT_HMAC_SHA256_STATE pState, + _Out_writes_( SYMCRYPT_HMAC_SHA256_RESULT_SIZE )PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha256Selftest(void); + +extern const PCSYMCRYPT_MAC SymCryptHmacSha256Algorithm; + +//////////////////////////////////////////////////////////////////////////// +// HMAC-SHA-384 +// +// + +#define SYMCRYPT_HMAC_SHA384_RESULT_SIZE SYMCRYPT_SHA384_RESULT_SIZE +#define SYMCRYPT_HMAC_SHA384_INPUT_BLOCK_SIZE SYMCRYPT_SHA384_INPUT_BLOCK_SIZE + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHmacSha384ExpandKey( + _Out_ PSYMCRYPT_HMAC_SHA384_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ); +// +// Supports all key lengths; never returns an error. +// + +VOID +SYMCRYPT_CALL +SymCryptHmacSha384KeyCopy( + _In_ PCSYMCRYPT_HMAC_SHA384_EXPANDED_KEY pSrc, + _Out_ PSYMCRYPT_HMAC_SHA384_EXPANDED_KEY pDst ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha384( + _In_ PCSYMCRYPT_HMAC_SHA384_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_HMAC_SHA384_RESULT_SIZE )PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha384StateCopy( + _In_ PCSYMCRYPT_HMAC_SHA384_STATE pSrc, + _In_opt_ PCSYMCRYPT_HMAC_SHA384_EXPANDED_KEY pExpandedKey, + _Out_ PSYMCRYPT_HMAC_SHA384_STATE pDst ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha384Init( + _Out_ PSYMCRYPT_HMAC_SHA384_STATE pState, + _In_ PCSYMCRYPT_HMAC_SHA384_EXPANDED_KEY pExpandedKey); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha384Append( + _Inout_ PSYMCRYPT_HMAC_SHA384_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha384Result( + _Inout_ PSYMCRYPT_HMAC_SHA384_STATE pState, + _Out_writes_( SYMCRYPT_HMAC_SHA384_RESULT_SIZE )PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha384Selftest(void); + +extern const PCSYMCRYPT_MAC SymCryptHmacSha384Algorithm; + +//////////////////////////////////////////////////////////////////////////// +// HMAC-SHA-512 +// +// + +#define SYMCRYPT_HMAC_SHA512_RESULT_SIZE SYMCRYPT_SHA512_RESULT_SIZE +#define SYMCRYPT_HMAC_SHA512_INPUT_BLOCK_SIZE SYMCRYPT_SHA512_INPUT_BLOCK_SIZE + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHmacSha512ExpandKey( + _Out_ PSYMCRYPT_HMAC_SHA512_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ); +// +// Supports all key lengths; never returns an error. +// + +VOID +SYMCRYPT_CALL +SymCryptHmacSha512KeyCopy( + _In_ PCSYMCRYPT_HMAC_SHA512_EXPANDED_KEY pSrc, + _Out_ PSYMCRYPT_HMAC_SHA512_EXPANDED_KEY pDst ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha512( + _In_ PCSYMCRYPT_HMAC_SHA512_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_HMAC_SHA512_RESULT_SIZE )PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha512StateCopy( + _In_ PCSYMCRYPT_HMAC_SHA512_STATE pSrc, + _In_opt_ PCSYMCRYPT_HMAC_SHA512_EXPANDED_KEY pExpandedKey, + _Out_ PSYMCRYPT_HMAC_SHA512_STATE pDst ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha512Init( + _Out_ PSYMCRYPT_HMAC_SHA512_STATE pState, + _In_ PCSYMCRYPT_HMAC_SHA512_EXPANDED_KEY pExpandedKey); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha512Append( + _Inout_ PSYMCRYPT_HMAC_SHA512_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha512Result( + _Inout_ PSYMCRYPT_HMAC_SHA512_STATE pState, + _Out_writes_( SYMCRYPT_HMAC_SHA512_RESULT_SIZE )PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha512Selftest(void); + +extern const PCSYMCRYPT_MAC SymCryptHmacSha512Algorithm; + +//////////////////////////////////////////////////////////////////////////// +// HMAC-SHA-512_224 +// +// This implementation is meant for interoperability and is not recommended for use. +// +// + +#define SYMCRYPT_HMAC_SHA512_224_RESULT_SIZE SYMCRYPT_SHA512_224_RESULT_SIZE +#define SYMCRYPT_HMAC_SHA512_224_INPUT_BLOCK_SIZE SYMCRYPT_SHA512_224_INPUT_BLOCK_SIZE + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHmacSha512_224ExpandKey( + _Out_ PSYMCRYPT_HMAC_SHA512_224_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ); +// +// Supports all key lengths; never returns an error. +// + +VOID +SYMCRYPT_CALL +SymCryptHmacSha512_224KeyCopy( + _In_ PCSYMCRYPT_HMAC_SHA512_224_EXPANDED_KEY pSrc, + _Out_ PSYMCRYPT_HMAC_SHA512_224_EXPANDED_KEY pDst ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha512_224( + _In_ PCSYMCRYPT_HMAC_SHA512_224_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_HMAC_SHA512_224_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha512_224StateCopy( + _In_ PCSYMCRYPT_HMAC_SHA512_224_STATE pSrc, + _In_opt_ PCSYMCRYPT_HMAC_SHA512_224_EXPANDED_KEY pExpandedKey, + _Out_ PSYMCRYPT_HMAC_SHA512_224_STATE pDst ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha512_224Init( + _Out_ PSYMCRYPT_HMAC_SHA512_224_STATE pState, + _In_ PCSYMCRYPT_HMAC_SHA512_224_EXPANDED_KEY pExpandedKey); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha512_224Append( + _Inout_ PSYMCRYPT_HMAC_SHA512_224_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha512_224Result( + _Inout_ PSYMCRYPT_HMAC_SHA512_224_STATE pState, + _Out_writes_( SYMCRYPT_HMAC_SHA512_224_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha512_224Selftest(void); + +extern const PCSYMCRYPT_MAC SymCryptHmacSha512_224Algorithm; + +//////////////////////////////////////////////////////////////////////////// +// HMAC-SHA-512_256 +// +// This implementation is meant for interoperability and is not recommended for use. +// +// + +#define SYMCRYPT_HMAC_SHA512_256_RESULT_SIZE SYMCRYPT_SHA512_256_RESULT_SIZE +#define SYMCRYPT_HMAC_SHA512_256_INPUT_BLOCK_SIZE SYMCRYPT_SHA512_256_INPUT_BLOCK_SIZE + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHmacSha512_256ExpandKey( + _Out_ PSYMCRYPT_HMAC_SHA512_256_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ); +// +// Supports all key lengths; never returns an error. +// + +VOID +SYMCRYPT_CALL +SymCryptHmacSha512_256KeyCopy( + _In_ PCSYMCRYPT_HMAC_SHA512_256_EXPANDED_KEY pSrc, + _Out_ PSYMCRYPT_HMAC_SHA512_256_EXPANDED_KEY pDst ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha512_256( + _In_ PCSYMCRYPT_HMAC_SHA512_256_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_HMAC_SHA512_256_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha512_256StateCopy( + _In_ PCSYMCRYPT_HMAC_SHA512_256_STATE pSrc, + _In_opt_ PCSYMCRYPT_HMAC_SHA512_256_EXPANDED_KEY pExpandedKey, + _Out_ PSYMCRYPT_HMAC_SHA512_256_STATE pDst ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha512_256Init( + _Out_ PSYMCRYPT_HMAC_SHA512_256_STATE pState, + _In_ PCSYMCRYPT_HMAC_SHA512_256_EXPANDED_KEY pExpandedKey); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha512_256Append( + _Inout_ PSYMCRYPT_HMAC_SHA512_256_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha512_256Result( + _Inout_ PSYMCRYPT_HMAC_SHA512_256_STATE pState, + _Out_writes_( SYMCRYPT_HMAC_SHA512_256_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha512_256Selftest(void); + +extern const PCSYMCRYPT_MAC SymCryptHmacSha512_256Algorithm; + +//////////////////////////////////////////////////////////////////////////// +// HMAC-SHA3-224 +// +// This implementation is meant for interoperability and is not recommended for use. +// +// + +#define SYMCRYPT_HMAC_SHA3_224_RESULT_SIZE SYMCRYPT_SHA3_224_RESULT_SIZE +#define SYMCRYPT_HMAC_SHA3_224_INPUT_BLOCK_SIZE SYMCRYPT_SHA3_224_INPUT_BLOCK_SIZE + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHmacSha3_224ExpandKey( + _Out_ PSYMCRYPT_HMAC_SHA3_224_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ); +// +// Supports all key lengths; never returns an error. +// + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_224KeyCopy( + _In_ PCSYMCRYPT_HMAC_SHA3_224_EXPANDED_KEY pSrc, + _Out_ PSYMCRYPT_HMAC_SHA3_224_EXPANDED_KEY pDst ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_224( + _In_ PCSYMCRYPT_HMAC_SHA3_224_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_HMAC_SHA3_224_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_224StateCopy( + _In_ PCSYMCRYPT_HMAC_SHA3_224_STATE pSrc, + _In_opt_ PCSYMCRYPT_HMAC_SHA3_224_EXPANDED_KEY pExpandedKey, + _Out_ PSYMCRYPT_HMAC_SHA3_224_STATE pDst ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_224Init( + _Out_ PSYMCRYPT_HMAC_SHA3_224_STATE pState, + _In_ PCSYMCRYPT_HMAC_SHA3_224_EXPANDED_KEY pExpandedKey); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_224Append( + _Inout_ PSYMCRYPT_HMAC_SHA3_224_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_224Result( + _Inout_ PSYMCRYPT_HMAC_SHA3_224_STATE pState, + _Out_writes_( SYMCRYPT_HMAC_SHA3_224_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_224Selftest(void); + +extern const PCSYMCRYPT_MAC SymCryptHmacSha3_224Algorithm; + +//////////////////////////////////////////////////////////////////////////// +// HMAC-SHA3-256 +// +// + +#define SYMCRYPT_HMAC_SHA3_256_RESULT_SIZE SYMCRYPT_SHA3_256_RESULT_SIZE +#define SYMCRYPT_HMAC_SHA3_256_INPUT_BLOCK_SIZE SYMCRYPT_SHA3_256_INPUT_BLOCK_SIZE + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHmacSha3_256ExpandKey( + _Out_ PSYMCRYPT_HMAC_SHA3_256_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ); +// +// Supports all key lengths; never returns an error. +// + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_256KeyCopy( + _In_ PCSYMCRYPT_HMAC_SHA3_256_EXPANDED_KEY pSrc, + _Out_ PSYMCRYPT_HMAC_SHA3_256_EXPANDED_KEY pDst ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_256( + _In_ PCSYMCRYPT_HMAC_SHA3_256_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_HMAC_SHA3_256_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_256StateCopy( + _In_ PCSYMCRYPT_HMAC_SHA3_256_STATE pSrc, + _In_opt_ PCSYMCRYPT_HMAC_SHA3_256_EXPANDED_KEY pExpandedKey, + _Out_ PSYMCRYPT_HMAC_SHA3_256_STATE pDst ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_256Init( + _Out_ PSYMCRYPT_HMAC_SHA3_256_STATE pState, + _In_ PCSYMCRYPT_HMAC_SHA3_256_EXPANDED_KEY pExpandedKey); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_256Append( + _Inout_ PSYMCRYPT_HMAC_SHA3_256_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_256Result( + _Inout_ PSYMCRYPT_HMAC_SHA3_256_STATE pState, + _Out_writes_( SYMCRYPT_HMAC_SHA3_256_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_256Selftest(void); + +extern const PCSYMCRYPT_MAC SymCryptHmacSha3_256Algorithm; + +//////////////////////////////////////////////////////////////////////////// +// HMAC-SHA3-384 +// +// + +#define SYMCRYPT_HMAC_SHA3_384_RESULT_SIZE SYMCRYPT_SHA3_384_RESULT_SIZE +#define SYMCRYPT_HMAC_SHA3_384_INPUT_BLOCK_SIZE SYMCRYPT_SHA3_384_INPUT_BLOCK_SIZE + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHmacSha3_384ExpandKey( + _Out_ PSYMCRYPT_HMAC_SHA3_384_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ); +// +// Supports all key lengths; never returns an error. +// + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_384KeyCopy( + _In_ PCSYMCRYPT_HMAC_SHA3_384_EXPANDED_KEY pSrc, + _Out_ PSYMCRYPT_HMAC_SHA3_384_EXPANDED_KEY pDst ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_384( + _In_ PCSYMCRYPT_HMAC_SHA3_384_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_HMAC_SHA3_384_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_384StateCopy( + _In_ PCSYMCRYPT_HMAC_SHA3_384_STATE pSrc, + _In_opt_ PCSYMCRYPT_HMAC_SHA3_384_EXPANDED_KEY pExpandedKey, + _Out_ PSYMCRYPT_HMAC_SHA3_384_STATE pDst ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_384Init( + _Out_ PSYMCRYPT_HMAC_SHA3_384_STATE pState, + _In_ PCSYMCRYPT_HMAC_SHA3_384_EXPANDED_KEY pExpandedKey); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_384Append( + _Inout_ PSYMCRYPT_HMAC_SHA3_384_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_384Result( + _Inout_ PSYMCRYPT_HMAC_SHA3_384_STATE pState, + _Out_writes_( SYMCRYPT_HMAC_SHA3_384_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_384Selftest(void); + +extern const PCSYMCRYPT_MAC SymCryptHmacSha3_384Algorithm; + +//////////////////////////////////////////////////////////////////////////// +// HMAC-SHA3-512 +// +// + +#define SYMCRYPT_HMAC_SHA3_512_RESULT_SIZE SYMCRYPT_SHA3_512_RESULT_SIZE +#define SYMCRYPT_HMAC_SHA3_512_INPUT_BLOCK_SIZE SYMCRYPT_SHA3_512_INPUT_BLOCK_SIZE + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHmacSha3_512ExpandKey( + _Out_ PSYMCRYPT_HMAC_SHA3_512_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ); +// +// Supports all key lengths; never returns an error. +// + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_512KeyCopy( + _In_ PCSYMCRYPT_HMAC_SHA3_512_EXPANDED_KEY pSrc, + _Out_ PSYMCRYPT_HMAC_SHA3_512_EXPANDED_KEY pDst ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_512( + _In_ PCSYMCRYPT_HMAC_SHA3_512_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_HMAC_SHA3_512_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_512StateCopy( + _In_ PCSYMCRYPT_HMAC_SHA3_512_STATE pSrc, + _In_opt_ PCSYMCRYPT_HMAC_SHA3_512_EXPANDED_KEY pExpandedKey, + _Out_ PSYMCRYPT_HMAC_SHA3_512_STATE pDst ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_512Init( + _Out_ PSYMCRYPT_HMAC_SHA3_512_STATE pState, + _In_ PCSYMCRYPT_HMAC_SHA3_512_EXPANDED_KEY pExpandedKey); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_512Append( + _Inout_ PSYMCRYPT_HMAC_SHA3_512_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_512Result( + _Inout_ PSYMCRYPT_HMAC_SHA3_512_STATE pState, + _Out_writes_( SYMCRYPT_HMAC_SHA3_512_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_512Selftest(void); + +extern const PCSYMCRYPT_MAC SymCryptHmacSha3_512Algorithm; + + +//////////////////////////////////////////////////////////////////////////// +// AES-CMAC +// +// This is the AES-CMAC algorithm per SP 800-38B & RFC 4493. +// It is also known as AES-OMAC1. +// + +#define SYMCRYPT_AES_CMAC_RESULT_SIZE (16) +#define SYMCRYPT_AES_CMAC_INPUT_BLOCK_SIZE (16) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptAesCmacExpandKey( + _Out_ PSYMCRYPT_AES_CMAC_EXPANDED_KEY pExpandedKey, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ); +// +// Key size must be a valid AES key (16, 24, or 32 bytes) +// + +VOID +SYMCRYPT_CALL +SymCryptAesCmacKeyCopy( + _In_ PCSYMCRYPT_AES_CMAC_EXPANDED_KEY pSrc, + _Out_ PSYMCRYPT_AES_CMAC_EXPANDED_KEY pDst ); + +VOID +SYMCRYPT_CALL +SymCryptAesCmac( + _In_ PSYMCRYPT_AES_CMAC_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_AES_CMAC_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptAesCmacStateCopy( + _In_ PCSYMCRYPT_AES_CMAC_STATE pSrc, + _In_opt_ PCSYMCRYPT_AES_CMAC_EXPANDED_KEY pExpandedKey, + _Out_ PSYMCRYPT_AES_CMAC_STATE pDst ); + +VOID +SYMCRYPT_CALL +SymCryptAesCmacInit( + _Out_ PSYMCRYPT_AES_CMAC_STATE pState, + _In_ PCSYMCRYPT_AES_CMAC_EXPANDED_KEY pExpandedKey); + +VOID +SYMCRYPT_CALL +SymCryptAesCmacAppend( + _Inout_ PSYMCRYPT_AES_CMAC_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesCmacResult( + _Inout_ PSYMCRYPT_AES_CMAC_STATE pState, + _Out_writes_( SYMCRYPT_AES_CMAC_RESULT_SIZE ) PBYTE pbResult ); + +VOID +SYMCRYPT_CALL +SymCryptAesCmacSelftest(void); + +extern const PCSYMCRYPT_MAC SymCryptAesCmacAlgorithm; + +//////////////////////////////////////////////////////////////////////////// +// KMAC +// +// Keccak Message Authentication Code (KMAC) is specified in NIST SP 800-185 +// and has two variants; KMAC128 and KMAC256, using cSHAKE128 and cSHAKE256 +// as the underlying functions, respectively. +// +// KMAC128(K, X, L, S) = cSHAKE128(bytepad(encode_string(K), 168) || X || right_encode(L), L, "KMAC", S) +// KMAC256(K, X, L, S) = cSHAKE256(bytepad(encode_string(K), 136) || X || right_encode(L), L, "KMAC", S) +// +// KMAC accepts a variable-size key. There's no restriction on the size of the key. +// +// KMAC differs from other MAC algorithms in SymCrypt by having two additional input +// parameters; a customization string and the length of the output. Output generated +// by KMAC also depends on the specified output length, i.e., outputs generated from +// two KMAC calls with the same key, message, customization string, but different output +// lengths will be unrelated/uncorrelated. This differs from SHAKE and cSHAKE where an +// output of size N bytes from the algorithm is a prefix of the output of size M bytes +// where N < M, when the inputs are the same. +// +// KMAC works in two modes; fixed-length mode and XOF mode. XOF variants are named KMACXOF128 +// and KMACXOF256. SymCrypt does not provide a separate KMACXOF API but supports them via +// the KMAC interface. +// +// KMACXOF128(K, X, L, S) = cSHAKE128(bytepad(encode_string(K), 168) || X || right_encode(0), L, "KMAC", S) +// KMACXOF256(K, X, L, S) = cSHAKE256(bytepad(encode_string(K), 136) || X || right_encode(0), L, "KMAC", S) +// +// KMAC output generation mode is determined by the output length parameter +// L in SP 800-185; if it is non-zero then KMAC works in fixed-length mode, otherwise (i.e., L=0) +// it works in XOF mode. +// - Fixed-length mode generates result with SymCryptKmacXxxResult or SymCryptKmacXxxResultEx. +// These functions wipe the state after generating output, thus can only be used +// once per initialized state. The result size is SYMCRYPT_KMAC_XXX_RESULT +// for SymCryptKmacXxxResult and specified by the caller for SymCryptKmacXxxResultEx. +// - XOF mode can produce arbitrary length output. SymCryptKmacXxxExtract function puts KMAC +// state into XOF mode and all the successive calls that generate output from the KMAC state will be +// from the XOF mode. SymCryptKmacXxxResult and SymCryptKmacXxxResultEx functions +// will also generate output in XOF mode IF they are called after a SymCryptKmacXxxExtract +// function with bWipe=FALSE (so that the state remains in XOF mode). Note that +// SymCryptKmacXxxResult and SymCryptKmacXxxResultEx functions wipe the state afterwards, +// thus KMAC state can only be used to generate output in XOF mode once with these two functions. +// +// SYMCRYPT_KMACXXX_RESULT_SIZE +// +// Default result size when KMAC is used with the existing MAC interface. +// Equals to twice the SYMCRYPT_KMACXXX_KEY_SIZE. +// +// SYMCRYPT_ERROR +// SYMCRYPT_CALL +// SymCryptKmacXxxExpandKey( +// _Out_ PSYMCRYPT_KMACXXX_EXPANDED_KEY pExpandedKey, +// _In_reads_bytes_( cbKey ) PCBYTE pbKey, +// SIZE_T cbKey); +// +// Performs key expansion with empty customization string. +// There's no restriction on the size of the key. +// +// SYMCRYPT_ERROR +// SYMCRYPT_CALL +// SymCryptKmacXxxExpandKeyEx( +// _Out_ PSYMCRYPT_KMAXXX_EXPANDED_KEY pExpandedKey, +// _In_reads_bytes_( cbKey ) PCBYTE pbKey, +// SIZE_T cbKey, +// _In_reads_bytes_( cbCustomizationString ) PCBYTE pbCustomizationString, +// SIZE_T cbCustomizationString); +// +// Performs key expansion for the provided key and customization string. +// There's no restriction on the size of the key. +// +// VOID +// SYMCRYPT_CALL +// SymCryptKmacXxx( +// _In_ PCSYMCRYPT_KMACXXX_EXPANDED_KEY pExpandedKey, +// _In_reads_bytes_( cbInput ) PCBYTE pbInput, +// SIZE_T cbInput, +// _Out_writes_bytes_( SYMCRYPT_KMACXXX_RESULT_SIZE ) PBYTE pbResult); +// +// Single-call KMAC computation for the given input producing default result +// size SYMCRYPT_KMACXXX_RESULT_SIZE. +// +// pExpandedKey must be initialized before the call. This function is equivalent +// to SymCryptKmacXxxEx with output size set to SYMCRYPT_KMACXXX_RESULT_SIZE. +// If a result size different than the default value is desired, SymCryptKmacXxxEx +// must be called. +// +// VOID +// SYMCRYPT_CALL +// SymCryptKmacXxxEx( +// _In_ PCSYMCRYPT_KMACXXX_EXPANDED_KEY pExpandedKey, +// _In_reads_bytes_( cbInput ) PCBYTE pbInput, +// SIZE_T cbInput, +// _Out_writes_bytes_( cbResult ) PBYTE pbResult, +// SIZE_T cbResult); +// +// Single-call KMAC computation for the given input producing cbResult bytes result. +// pExpandedKey must be initialized before the call. +// +// VOID +// SYMCRYPT_CALL +// SymCryptKmacXxxInit( +// _Out_ PSYMCRYPT_KMACXXX_STATE pState, +// _In_ PCSYMCRYPT_KMACXXX_EXPANDED_KEY pExpandedKey); +// +// Initializes KMAC state for appending data for the provided key. Expanded +// key must be generated prior to this call. +// +// VOID +// SYMCRYPT_CALL +// SymCryptKmacXxxAppend( +// _Inout_ PSYMCRYPT_KMACXXX_STATE pState, +// _In_reads_( cbData ) PCBYTE pbData, +// SIZE_T cbData ); +// +// Appends data to the KMAC state. +// +// This function must only be called after SymCryptKmacXxxInit or SymCryptKmacXxxAppend. +// Calling SymCryptKmacXxxAppend after SymCryptKmacXxxExtract with bWipe=FALSE +// is not well-defined. KMAC state must be initialized with SymCryptKmacXxxInit before +// the first call to SymCryptKmacXxxAppend. +// +// VOID +// SYMCRYPT_CALL +// SymCryptKmacXxxExtract( +// _Inout_ PSYMCRYPT_KMACXXX_STATE pState, +// _Out_writes_( cbOutput ) PBYTE pbOutput, +// SIZE_T cbOutput, +// BOOLEAN bWipe); +// +// Generates KMAC output in XOF mode. +// +// Extract can only be called after an Init, Append or Extract call. +// The state is cleared if bWipe=TRUE, otherwise further Extract calls +// can be made to generate more output. +// +// VOID +// SYMCRYPT_CALL +// SymCryptKmacXxxResult( +// _Inout_ PSYMCRYPT_KMACXXX_STATE pState, +// _Out_writes_( SYMCRYPT_KMACXXX_RESULT_SIZE ) PBYTE pbResult); +// +// Produces SYMCRYPT_KMACXXX_RESULT_SIZE bytes of output from the KMAC state. +// The state is wiped on return. +// +// This function internally calls SymCryptKmacXxxResultEx with result size +// SYMCRYPT_KMACXXX_RESULT_SIZE. +// If Result is called in XOF mode (i.e., after an Extract with bWipe=FALSE), it +// performs a final extraction of SYMCRYPT_KMACXXX_RESULT_SIZE bytes in XOF mode +// and clears the state afterwards. +// Result function does not re-initialize the state for a new computation like +// the Result for hash functions do. Computing a new MAC with the same key +// requires calling the SymCryptKmacXxxInit function first. +// +// VOID +// SYMCRYPT_CALL +// SymCryptKmacXxxResultEx( +// _Inout_ PSYMCRYPT_KMACXXX_STATE pState, +// _Out_writes_( cbResult ) PBYTE pbResult, +// SIZE_T cbResult); +// +// Produces cbResult bytes of output from the KMAC state. The state is +// wiped on return. +// +// If ResultEx is called in XOF mode (i.e., after an Extract with bWipe=FALSE), it +// performs a final extraction of cbResult bytes in XOF mode and clears the state +// afterwards. +// ResultEx function does not re-initialize the state for a new computation like +// the Result for hash functions do. Computing a new MAC with the same key +// requires calling the SymCryptKmacXxxInit function first. +// + + +// +// KMAC128 +// +#define SYMCRYPT_KMAC128_RESULT_SIZE SYMCRYPT_CSHAKE128_RESULT_SIZE +#define SYMCRYPT_KMAC128_INPUT_BLOCK_SIZE SYMCRYPT_CSHAKE128_INPUT_BLOCK_SIZE + +VOID +SYMCRYPT_CALL +SymCryptKmac128( + _In_ PCSYMCRYPT_KMAC128_EXPANDED_KEY pExpandedKey, + _In_reads_bytes_( cbInput ) PCBYTE pbInput, + SIZE_T cbInput, + _Out_writes_bytes_( SYMCRYPT_KMAC128_RESULT_SIZE ) PBYTE pbResult); + +VOID +SYMCRYPT_CALL +SymCryptKmac128Ex( + _In_ PCSYMCRYPT_KMAC128_EXPANDED_KEY pExpandedKey, + _In_reads_bytes_( cbInput ) PCBYTE pbInput, + SIZE_T cbInput, + _Out_writes_bytes_( cbResult ) PBYTE pbResult, + SIZE_T cbResult); + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptKmac128ExpandKey( + _Out_ PSYMCRYPT_KMAC128_EXPANDED_KEY pExpandedKey, + _In_reads_bytes_( cbKey ) PCBYTE pbKey, + SIZE_T cbKey); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptKmac128ExpandKeyEx( + _Out_ PSYMCRYPT_KMAC128_EXPANDED_KEY pExpandedKey, + _In_reads_bytes_( cbKey ) PCBYTE pbKey, + SIZE_T cbKey, + _In_reads_bytes_( cbCustomizationString ) PCBYTE pbCustomizationString, + SIZE_T cbCustomizationString); + +VOID +SYMCRYPT_CALL +SymCryptKmac128Init( + _Out_ PSYMCRYPT_KMAC128_STATE pState, + _In_ PCSYMCRYPT_KMAC128_EXPANDED_KEY pExpandedKey); + +VOID +SYMCRYPT_CALL +SymCryptKmac128Append( + _Inout_ PSYMCRYPT_KMAC128_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptKmac128Extract( + _Inout_ PSYMCRYPT_KMAC128_STATE pState, + _Out_writes_( cbOutput ) PBYTE pbOutput, + SIZE_T cbOutput, + BOOLEAN bWipe); + +VOID +SYMCRYPT_CALL +SymCryptKmac128Result( + _Inout_ PSYMCRYPT_KMAC128_STATE pState, + _Out_writes_( SYMCRYPT_KMAC128_RESULT_SIZE ) PBYTE pbResult); + +VOID +SYMCRYPT_CALL +SymCryptKmac128ResultEx( + _Inout_ PSYMCRYPT_KMAC128_STATE pState, + _Out_writes_( cbResult ) PBYTE pbResult, + SIZE_T cbResult); + +VOID +SYMCRYPT_CALL +SymCryptKmac128KeyCopy(_In_ PCSYMCRYPT_KMAC128_EXPANDED_KEY pSrc, _Out_ PSYMCRYPT_KMAC128_EXPANDED_KEY pDst); + +VOID +SYMCRYPT_CALL +SymCryptKmac128StateCopy(_In_ const SYMCRYPT_KMAC128_STATE* pSrc, _Out_ SYMCRYPT_KMAC128_STATE* pDst); + +VOID +SYMCRYPT_CALL +SymCryptKmac128Selftest(void); + +extern const PCSYMCRYPT_MAC SymCryptKmac128Algorithm; + +// +// KMAC256 +// +#define SYMCRYPT_KMAC256_RESULT_SIZE SYMCRYPT_CSHAKE256_RESULT_SIZE +#define SYMCRYPT_KMAC256_INPUT_BLOCK_SIZE SYMCRYPT_CSHAKE256_INPUT_BLOCK_SIZE + +VOID +SYMCRYPT_CALL +SymCryptKmac256( + _In_ PCSYMCRYPT_KMAC256_EXPANDED_KEY pExpandedKey, + _In_reads_bytes_( cbInput ) PCBYTE pbInput, + SIZE_T cbInput, + _Out_writes_bytes_( SYMCRYPT_KMAC256_RESULT_SIZE ) PBYTE pbResult); + +VOID +SYMCRYPT_CALL +SymCryptKmac256Ex( + _In_ PCSYMCRYPT_KMAC256_EXPANDED_KEY pExpandedKey, + _In_reads_bytes_( cbInput ) PCBYTE pbInput, + SIZE_T cbInput, + _Out_writes_bytes_( cbResult ) PBYTE pbResult, + SIZE_T cbResult); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptKmac256ExpandKey( + _Out_ PSYMCRYPT_KMAC256_EXPANDED_KEY pExpandedKey, + _In_reads_bytes_( cbKey ) PCBYTE pbKey, + SIZE_T cbKey); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptKmac256ExpandKeyEx( + _Out_ PSYMCRYPT_KMAC256_EXPANDED_KEY pExpandedKey, + _In_reads_bytes_( cbKey ) PCBYTE pbKey, + SIZE_T cbKey, + _In_reads_bytes_( cbCustomizationString ) PCBYTE pbCustomizationString, + SIZE_T cbCustomizationString); + +VOID +SYMCRYPT_CALL +SymCryptKmac256Init( + _Out_ PSYMCRYPT_KMAC256_STATE pState, + _In_ PCSYMCRYPT_KMAC256_EXPANDED_KEY pExpandedKey); + +VOID +SYMCRYPT_CALL +SymCryptKmac256Append( + _Inout_ PSYMCRYPT_KMAC256_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptKmac256Extract( + _Inout_ PSYMCRYPT_KMAC256_STATE pState, + _Out_writes_( cbOutput ) PBYTE pbOutput, + SIZE_T cbOutput, + BOOLEAN bWipe); + +VOID +SYMCRYPT_CALL +SymCryptKmac256Result( + _Inout_ PSYMCRYPT_KMAC256_STATE pState, + _Out_writes_( SYMCRYPT_KMAC256_RESULT_SIZE ) PBYTE pbResult); + +VOID +SYMCRYPT_CALL +SymCryptKmac256ResultEx( + _Inout_ PSYMCRYPT_KMAC256_STATE pState, + _Out_writes_( cbResult ) PBYTE pbResult, + SIZE_T cbResult); + +VOID +SYMCRYPT_CALL +SymCryptKmac256KeyCopy(_In_ PCSYMCRYPT_KMAC256_EXPANDED_KEY pSrc, _Out_ PSYMCRYPT_KMAC256_EXPANDED_KEY pDst); + +VOID +SYMCRYPT_CALL +SymCryptKmac256StateCopy(_In_ const SYMCRYPT_KMAC256_STATE* pSrc, _Out_ SYMCRYPT_KMAC256_STATE* pDst); + +VOID +SYMCRYPT_CALL +SymCryptKmac256Selftest(void); + +extern const PCSYMCRYPT_MAC SymCryptKmac256Algorithm; + + +//////////////////////////////////////////////////////////////////////////// +// POLY1305 +// +// Poly1305 is different from other MAC functions because a key can only +// be used safely for a single message. +// We do not follow the default API pattern for MAC functions as that invites +// callers to compute multiple MACs per key. +// + +#define SYMCRYPT_POLY1305_RESULT_SIZE (16) +#define SYMCRYPT_POLY1305_BLOCK_SIZE (16) +#define SYMCRYPT_POLY1305_KEY_SIZE (32) + +VOID +SYMCRYPT_CALL +SymCryptPoly1305( + _In_reads_( SYMCRYPT_POLY1305_KEY_SIZE ) PCBYTE pbKey, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_POLY1305_RESULT_SIZE ) PBYTE pbResult ); +// Compute a Poly1305 authentication with the provided key on the data buffer. +// Note: A Poly1305 key may only be used for a single message. + +VOID +SYMCRYPT_CALL +SymCryptPoly1305Init( + _Out_ PSYMCRYPT_POLY1305_STATE pState, + _In_reads_( SYMCRYPT_POLY1305_KEY_SIZE ) PCBYTE pbKey ); +// Starts an incremental Poly1305 computation. +// Note: A Poly1305 key may only be used for a single message. + +VOID +SYMCRYPT_CALL +SymCryptPoly1305Append( + _Inout_ PSYMCRYPT_POLY1305_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptPoly1305Result( + _Inout_ PSYMCRYPT_POLY1305_STATE pState, + _Out_writes_( SYMCRYPT_POLY1305_RESULT_SIZE ) PBYTE pbResult ); +// The state is wiped and not suitable for re-use. + +VOID +SYMCRYPT_CALL +SymCryptPoly1305Selftest(void); + +// +// We do NOT define a SYMCRYPT_MAC structure SymCryptPoly1305Algorithm +// for Poly1305 as it is a 1-time MAC function and cannot safely be used +// by any KDF we have +// +// NOT DEFINED: extern const PCSYMCRYPT_MAC SymCryptPoly1305Algorithm; +// + +//////////////////////////////////////////////////////////////////////////// +// CHACHA20_POLY1305 +// +// This algorithm combines the CHACHA20 symmetric key stream cipher with +// the POLY1305 MAC function as per RFC 8439. +// The POLY1305 authenticator key is generated from the first 32 bytes +// of the CHACHA20 keystream and is only valid for a single message. +// For this reason each key and nonce combination passed to +// SymCryptChaCha20Poly1305Encrypt MUST only be used once. +// +// The Src and Dst buffers can be identical or non-overlapping; partial overlaps +// are not supported. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptChaCha20Poly1305Encrypt( + _In_reads_( cbKey ) PCBYTE pbKey, + SIZE_T cbKey, // Required. Key size MUST be 32 bytes. + _In_reads_( cbNonce ) PCBYTE pbNonce, + SIZE_T cbNonce, // Required. Nonce size MUST be 12 bytes. + _In_reads_opt_( cbAuthData ) PCBYTE pbAuthData, + SIZE_T cbAuthData, // Optional. Can be any size. + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData, // Required. Max size is 274,877,906,880 bytes. + _Out_writes_( cbTag ) PBYTE pbTag, + SIZE_T cbTag ); // Required. Tag size MUST be 16 bytes. + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptChaCha20Poly1305Decrypt( + _In_reads_( cbKey ) PCBYTE pbKey, + SIZE_T cbKey, // Required. Key size MUST be 32 bytes. + _In_reads_( cbNonce ) PCBYTE pbNonce, + SIZE_T cbNonce, // Required. Nonce size MUST be 12 bytes. + _In_reads_opt_( cbAuthData ) PCBYTE pbAuthData, + SIZE_T cbAuthData, // Optional. Can be any size. + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData, // Required. Max size is 274,877,906,880 bytes. + _In_reads_( cbTag ) PCBYTE pbTag, + SIZE_T cbTag ); // Required. Tag size MUST be 16 bytes. + +VOID +SYMCRYPT_CALL +SymCryptChaCha20Poly1305Selftest(void); + +//////////////////////////////////////////////////////////////////////////// +// MARVIN32 +// +// Marvin is a checksum function optimized for speed on small inputs. +// IT IS NOT A CRYPTOGRAPHIC HASH FUNCTION. +// Marvin lacks the security properties of a cryptographic hash function. +// DO NOT USE FOR ANY SECURITY USE. +// +// A randomizable checksum function has essentially the same API as a MAC +// function. We use the SymCrypt MAC API here, with the difference +// that we use the word 'seed' rather than 'key'. +// +// See the description above of the generic MAC API for details on how +// these functions are used. Wherever the MAC API talks about keys, this +// applies to the seed for Marvin32. +// +// The randomization is useful for hash tables. +// There are DOS attacks where an attacker generates many inputs that +// hash to the same location in the hash table. Some hash table implementations +// then use O(n^2) CPU time, allowing a DOS attack. +// The randomization provided by the seed avoids this attack if: +// - The seed is unpredictable and unknown to the attacker. +// - The attacker cannot learn information about the output of the checksum function. +// In particular, if an attacker can measure how long it takes to add each +// element in a hash table, they might be able to determine enough information about +// the output of the checksum function to recover the seed. Of course, +// once that is done the DOS attack is once again possible. +// +// SymCrypt provides a default seed for applications that don't need a seed. +// +// FUTURE IMPROVEMENTS: +// At the moment it is relatively expensive to change the seed. +// If needed, we can add a facility to modify the seed faster than +// re-running the ExpandSeed function. +// + +#define SYMCRYPT_MARVIN32_RESULT_SIZE (8) +#define SYMCRYPT_MARVIN32_SEED_SIZE (8) +#define SYMCRYPT_MARVIN32_INPUT_BLOCK_SIZE (4) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMarvin32ExpandSeed( + _Out_ PSYMCRYPT_MARVIN32_EXPANDED_SEED pExpandedSeed, + _In_reads_(cbSeed) PCBYTE pbSeed, + SIZE_T cbSeed ); +// +// The seed must be 8 bytes (= SYMCRYPT_MARVIN32_SEED_SIZE). +// Use of the all-zero seed is not recommended as it has some undesirable properties. +// Note that a pre-expanded default seed is provided for applications that do not wish to control +// their seed. Such applications do not need to call SymCryptMarvin32ExpandSeed +// + +extern PCSYMCRYPT_MARVIN32_EXPANDED_SEED const SymCryptMarvin32DefaultSeed; + +PCSYMCRYPT_MARVIN32_EXPANDED_SEED +SYMCRYPT_CALL +SymCryptGetMarvin32DefaultSeed( void ); +// +// Returns a pointer to the default Marvin32 seed. +// + +VOID +SYMCRYPT_CALL +SymCryptMarvin32SeedCopy( _In_ PCSYMCRYPT_MARVIN32_EXPANDED_SEED pSrc, + _Out_ PSYMCRYPT_MARVIN32_EXPANDED_SEED pDst ); + +VOID +SYMCRYPT_CALL +SymCryptMarvin32( + _In_ PCSYMCRYPT_MARVIN32_EXPANDED_SEED pExpandedSeed, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_MARVIN32_RESULT_SIZE ) PBYTE pbResult ); +// +// If the application does not wish to use a seed, a default expanded seed is provided. +// Callers can pass SymCryptMarvin32DefaultSeed as the first argument. +// + +VOID +SYMCRYPT_CALL +SymCryptMarvin32StateCopy( + _In_ PCSYMCRYPT_MARVIN32_STATE pSrc, + _In_opt_ PCSYMCRYPT_MARVIN32_EXPANDED_SEED pExpandedSeed, + _Out_ PSYMCRYPT_MARVIN32_STATE pDst ); + + +VOID +SYMCRYPT_CALL +SymCryptMarvin32Init( _Out_ PSYMCRYPT_MARVIN32_STATE pState, + _In_ PCSYMCRYPT_MARVIN32_EXPANDED_SEED pExpandedSeed); + +VOID +SYMCRYPT_CALL +SymCryptMarvin32Append( _Inout_ PSYMCRYPT_MARVIN32_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptMarvin32Result( + _Inout_ PSYMCRYPT_MARVIN32_STATE pState, + _Out_writes_( SYMCRYPT_MARVIN32_RESULT_SIZE ) PBYTE pbResult ); + + +VOID +SYMCRYPT_CALL +SymCryptMarvin32Selftest(void); + + +//========================================================================== +// BLOCK CIPHERS +//========================================================================== +// +// For a block cipher XXX the following minimal functions, types, and constants are defined: +// +// SYMCRYPT_XXX_BLOCK_SIZE +// +// A constant giving is the block size, in bytes, of the algorithm. +// +// +// SYMCRYPT_XXX_EXPANDED_KEY +// Type which contains a key with all the pre-computations performed. +// This is an opaque type whose structure can change at will. +// It should only be used for transient computations in a single executable +// and not be stored or transferred to a different environment. +// The pointer and const-pointer versions are also declared +// (PSYMCRYPOT_XXX_EXPANDED_KEY and PCSYMCRYPT_XXX_EXPANDED_KEY). +// +// The EXPANDED_KEY structure contains keying material and should be wiped +// once it is no longer used. (See SymCryptWipe & SymCryptWipeKnownSize) +// +// Once initialized, multiple threads can use the same expanded key object simultaneously +// for different block cipher computations as the expanded key is not modified once initialized. +// +// SymCryptXxxBlockCipher +// A SYMCRYPT_BLOCKCIPHER structure that provides a description +// of the block cipher and its primary functions. This is used by cipher modes to pass +// all the block-cipher specific information in a single structure. +// +// +// SYMCRYPT_ERROR +// SYMCRYPT_CALL +// SymCryptXxxExpandKey( _Out_ PSYMCRYPT_XXX_EXPANDED_KEY pExpandedKey, +// _In_reads_(cbKey) PCBYTE pbKey, +// SIZE_T cbKey ); +// +// Prepare a key for future use by the Xxx algorithm. +// This function performs pre-computations on the key +// to speed up the actual block cipher computations later, and stores the result as an expanded key. +// The expanded key must be kept unchanged until all computations that use the key are finished. +// When the key is no longer needed the expanded key structure should be wiped. +// +// Different algorithms pose different requirements on the length of the key. +// If the key that is provided is of an unsupported length the SYMCRYPT_WRONG_KEY_SIZE error is returned. +// In this case the expanded key structure will not contain any keying material and does not have to be wiped. +// +// +// VOID +// SYMCRYPT_CALL +// SymCryptXxxEncrypt( _In_ PCSYMCRYPT_XXX_EXPANDED_KEY pExpandedKey, +// _In_reads_( SYMCRYPT_XXX_BLOCK_SIZE ) PCBYTE pbSrc, +// _Out_writes_( SYMCRYPT_XXX_BLOCK_SIZE ) PBYTE pbDst ); +// +// Encrypt a single block. +// +// +// VOID +// SYMCRYPT_CALL +// SymCryptXxxDecrypt( _In_ PCSYMCRYPT_XXX_EXPANDED_KEY pExpandedKey, +// _In_reads_( SYMCRYPT_XXX_BLOCK_SIZE ) PCBYTE pbSrc, +// _Out_writes_( SYMCRYPT_XXX_BLOCK_SIZE ) PBYTE pbDst ); +// +// Decrypt a single block. +// +// +// -------------------------------------------------------------------------------------------------------------- +// In addition to these elementary encrypt block/decrypt block functions a block cipher may also implement +// optimized versions of CBC encryption, CBC decryption, CBC-MAC, and CTR encryption. Not all block ciphers +// do implement these. +// All block cipher modes are always available through the generic block cipher mode functions. +// +// VOID +// SYMCRYPT_CALL +// SymCryptXxxCbcEncrypt( +// _In_ PCSYMCRYPT_XXX_EXPANDED_KEY pExpandedKey, +// _Inout_updates_( SYMCRYPT_XXX_BLOCK_SIZE ) PBYTE pbChainingValue, +// _In_reads_( cbData ) PCBYTE pbSrc, +// _Out_writes_( cbData ) PBYTE pbDst, +// SIZE_T cbData ); +// +// Encrypt data using the CBC chaining mode. +// On entry the pbChainingValue is the IV which is xorred into the first plaintext block of the CBC encryption. +// On exit the pbChainingValue is updated to the last ciphertext block of the result. +// This allows a longer CBC encryption to be done incrementally. +// +// cbData must be a multiple of the block size. For efficiency reasons this routine does not return an error +// if cbData is not a proper multiple; instead the result is undefined. The routine might hang, +// round cbData down to a multiple of the block size, or return random data that cannot be decrypted. +// +// The pbSrc and pbDst buffers may be the same, or they may be non-overlapping. However, they may +// not be partially overlapping. +// +// +// VOID +// SYMCRYPT_CALL +// SymCryptXxxCbcDecrypt( +// _In_ PCSYMCRYPT_XXX_EXPANDED_KEY pExpandedKey, +// _Inout_updates_( SYMCRYPT_XXX_BLOCK_SIZE ) PBYTE pbChainingValue, +// _In_reads_( cbData ) PCBYTE pbSrc, +// _Out_writes_( cbData ) PBYTE pbDst, +// SIZE_T cbData ); +// +// Decrypt data using the CBC chaining mode. +// On entry the pbChainingValue is the IV to be xorred into the first plaintext block of the CBC decryption. +// On exit the pbChainingValue is updated to the last ciphertext block of the input. +// This allows a longer CBC decryption to be done incrementally. +// +// cbData must be a multiple of the block size. For efficiency reasons this routine does not return an error +// if cbData is not a proper multiple; instead the result is undefined. The routine might hang, +// round cbData down to a multiple of the block size, or return random data. +// +// The pbSrc and pbDst buffers may be the same, or they may be non-overlapping. However, they may +// not be partially overlapping. +// +// +// VOID +// SYMCRYPT_CALL +// SymCryptXxxCbcMac( +// _In_ PCSYMCRYPT_XXX_EXPANDED_KEY pExpandedKey, +// _Inout_updates_( SYMCRYPT_XXX_BLOCK_SIZE ) PBYTE pbChainingValue, +// _In_reads_( cbData ) PCBYTE pbData, +// SIZE_T cbData ); +// +// Compute a CBC-MAC on the input data. +// On entry the pbChainingValue is the current chaining state of the CBC-MAC computation; this routine +// updates the state to reflect the chaining state after MACing the data. +// cbData must be a multiple of the block size. +// This function is NOT intended for general use; rather it is a high-performance primitive to support +// implementations of other cipher modes like CCM and CMAC. +// Note: If a key is used for CBC-MAC computations it should NOT be used for any encryptions. +// +// +// VOID +// SYMCRYPT_CALL +// SymCryptXxxCtrMsb64( +// _In_ PCSYMCRYPT_XXX_EXPANDED_KEY pExpandedKey, +// _Inout_updates_( SYMCRYPT_XXX_BLOCK_SIZE ) PBYTE pbChainingValue, +// _In_reads_( cbData ) PCBYTE pbSrc, +// _Out_writes_( cbData ) PBYTE pbDst, +// SIZE_T cbData ); +// +// Perform a CTR encryption on the data. (Note: CTR encryption and decryption are the same operation.) +// On entry pbChainingValue contains the first counter value to be used. On exit it contains +// the next counter value to be used. +// The increment function treats the last 8 bytes of the pbChainingValue string as an integer +// in most-significant-byte-first format, and increments this integer. +// Thus, the last byte is incremented the fastest. +// The pbSrc and pbDst buffers may be identical or non-overlapping, but they may not partially overlap. +// cbData must be a multiple of the block size. +// +// +// VOID +// SYMCRYPT_CALL +// SymCryptXxxSelftest(void); +// +// Perform a minimal self-test on the XXX algorithm. +// This function is designed to be used for achieving FIPS 140-2 compliance or +// to provide a simple self-test when an application starts. +// +// If an error is detected the fatal callback routine is called. +// +// We do not provide self-tests for the various cipher modes. There are too many +// (block cipher, key size, cipher mode) combinations and CNG performs the self tests +// on the outside APIs, not on the internal APIs. +// We retain a self test on the basic algorithm to help internal library testing. + + + +//////////////////////////////////////////////////////////////////////////// +// AES +// +// The AES block cipher per FIPS 197 +// +// WARNING: +// Unless this code is running on a CPU with AES-NI instructions, +// the AES implementation makes extensive use of table lookups to implement the S-boxes of the algorithm. +// This violates our current crypto implementation guidelines and opens up a possible side-channel attack +// through information leakage via the memory caching system of the CPU. +// +// Unfortunately there is no known software fix for this that does not lead to an order of magnitude performance loss. +// An implementation that is 10x slower will not be used by anybody and is useless, so we implement a fast +// version that uses table lookups. (Just like all other systems we know of.) +// +// The risk of this type of side-channel attack is limited as it requires malicious code to run on the same +// machine as the code being attacked. +// +// At the time of writing (Apr 2007) there are no approved alternative encryption algorithms that do not +// use table lookups. NIST and NSA are aware of this problem, but so far we have not seen any indication +// that they consider this important enough to create an alternative encryption algorithm that does not +// rely on table lookups as much. +// + +#define SYMCRYPT_AES_BLOCK_SIZE (16) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptAesExpandKey( + _Out_ PSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ); + +// +// The SymCryptAesExpandKeyEncryptOnly creates an AES-expanded key that can ONLY be used +// for AES encryption operations. There are no safeguards when you use it for decryption; you get the wrong +// result if you try. +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptAesExpandKeyEncryptOnly( + _Out_ PSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ); + +VOID +SYMCRYPT_CALL +SymCryptAesKeyCopy( _In_ PCSYMCRYPT_AES_EXPANDED_KEY pSrc, + _Out_ PSYMCRYPT_AES_EXPANDED_KEY pDst ); + +VOID +SYMCRYPT_CALL +SymCryptAesEncrypt( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbDst ); + +VOID +SYMCRYPT_CALL +SymCryptAesDecrypt( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbDst ); + +VOID +SYMCRYPT_CALL +SymCryptAesEcbEncrypt( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesEcbDecrypt( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesCbcEncrypt( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesCbcDecrypt( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesCbcMac( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesCtrMsb64( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +// +// There are many optimized implementations for various AES modes. +// To test them all would pull in all the code for these modes. +// We solve this by letting the caller specify a bitmask of modes to be tested. +// Under the following circumstances this will avoid pulling in unnecessary code: +// - The argument is a compile-time constant. +// - The compiler implements the usual constant propagation optimizations. +// +// Note: GCM, CCM, and XTS are NOT tested by this function. + +#define SYMCRYPT_AES_SELFTEST_BASE 0x01 // tests AesEncrypt & AesDecrypt +#define SYMCRYPT_AES_SELFTEST_ECB 0x02 // ECB mode +#define SYMCRYPT_AES_SELFTEST_CBC 0x04 // CBC mode +#define SYMCRYPT_AES_SELFTEST_CBCMAC 0x08 // CBC-mac +#define SYMCRYPT_AES_SELFTEST_CTR 0x10 // all CTR modes + +#define SYMCRYPT_AES_SELFTEST_ALL 0x1f + +VOID +SYMCRYPT_CALL +SymCryptAesSelftest( UINT32 maskTestsToRun ); + +extern const PCSYMCRYPT_BLOCKCIPHER SymCryptAesBlockCipher; + + +//////////////////////////////////////////////////////////////////////////// +// DES +// +// The DES block cipher per FIPS-46-3 +// +// WARNING: +// DES is no longer considered secure and should not be used. +// Per the Crypto SDL, any use of DES in Microsoft code requires a Crypto board exemption +// +// The DES implementation makes extensive use of table lookups to implement the S-boxes of the algorithm. +// This violates our current crypto implementation guidelines and opens up a possible side-channel attack +// through information leakage via the memory caching system of the CPU. +// + +#define SYMCRYPT_DES_BLOCK_SIZE (8) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDesExpandKey( + _Out_ PSYMCRYPT_DES_EXPANDED_KEY pExpandedKey, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ); +// +// The key must be 8 bytes long. The parity bits in the key are ignored and can be any value. +// + +VOID +SYMCRYPT_CALL +SymCryptDesEncrypt( + _In_ PCSYMCRYPT_DES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_DES_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_DES_BLOCK_SIZE ) PBYTE pbDst ); + +VOID +SYMCRYPT_CALL +SymCryptDesDecrypt( + _In_ PCSYMCRYPT_DES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_DES_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_DES_BLOCK_SIZE ) PBYTE pbDst ); + + +VOID +SYMCRYPT_CALL +SymCryptDesSetOddParity( + _Inout_updates_( cbData ) PBYTE pbData, + _In_ SIZE_T cbData ); +// +// Set each byte to have odd parity by possibly flipping bit 0. +// This is the parity used by DES, and is needed for compatibility. +// The parity bit is ignored by the DES key expansion. +// + +VOID +SYMCRYPT_CALL +SymCryptDesSelftest(void); + +extern const PCSYMCRYPT_BLOCKCIPHER SymCryptDesBlockCipher; + +//////////////////////////////////////////////////////////////////////////// +// 3DES +// +// The triple-DES block cipher +// +// WARNING: +// The DES implementation makes extensive use of table lookups to implement the S-boxes of the algorithm. +// This violates our current crypto implementation guidelines and opens up a possible side-channel attack +// through information leakage via the memory caching system of the CPU. +// + +#define SYMCRYPT_3DES_BLOCK_SIZE (8) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCrypt3DesExpandKey( + _Out_ PSYMCRYPT_3DES_EXPANDED_KEY pExpandedKey, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ); +// +// If the provided key is 24 bytes long this expands a 3-key 3DES key. If 16 bytes are provided it +// expands a 2-key 3DES. If 8 bytes are provided it creates the 3-key equivalent of the single +// key des encryption. The parity bits in the key are ignored. +// + +VOID +SYMCRYPT_CALL +SymCrypt3DesEncrypt( + _In_ PCSYMCRYPT_3DES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_3DES_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_3DES_BLOCK_SIZE )PBYTE pbDst ); + +VOID +SYMCRYPT_CALL +SymCrypt3DesDecrypt( + _In_ PCSYMCRYPT_3DES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_3DES_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_3DES_BLOCK_SIZE )PBYTE pbDst ); + +VOID +SYMCRYPT_CALL +SymCrypt3DesCbcEncrypt( + _In_ PCSYMCRYPT_3DES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_3DES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCrypt3DesCbcDecrypt( + _In_ PCSYMCRYPT_3DES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_3DES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCrypt3DesSelftest(void); + +extern const PCSYMCRYPT_BLOCKCIPHER SymCrypt3DesBlockCipher; + +//////////////////////////////////////////////////////////////////////////// +// DESX +// +// The DESX block cipher. +// +// Use of DESX is not recommended. +// + +#define SYMCRYPT_DESX_BLOCK_SIZE (8) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDesxExpandKey( + _Out_ PSYMCRYPT_DESX_EXPANDED_KEY pExpandedKey, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ); + +VOID +SYMCRYPT_CALL +SymCryptDesxEncrypt( + _In_ PCSYMCRYPT_DESX_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_DESX_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_DESX_BLOCK_SIZE )PBYTE pbDst ); + +VOID +SYMCRYPT_CALL +SymCryptDesxDecrypt( + _In_ PCSYMCRYPT_DESX_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_DESX_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_DESX_BLOCK_SIZE )PBYTE pbDst ); + + +VOID +SYMCRYPT_CALL +SymCryptDesxSelftest(void); + +extern const PCSYMCRYPT_BLOCKCIPHER SymCryptDesxBlockCipher; + +//////////////////////////////////////////////////////////////////////////// +// RC2 +// +// The RC2 block cipher +// +// WARNING: +// Use of RC2 is not recommended for many reasons. +// +// The RC2 implementation makes extensive use of table lookups to implement the S-boxes of the algorithm. +// This violates our current crypto implementation guidelines and opens up a possible side-channel attack +// through information leakage via the memory caching system of the CPU. +// + +#define SYMCRYPT_RC2_BLOCK_SIZE (8) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRc2ExpandKey( + _Out_ PSYMCRYPT_RC2_EXPANDED_KEY pExpandedKey, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ); +// +// The default effective key size is 8*cbKey. Note that this is NOT the default used in +// the old RSA32 library which used a default effective key size of 40 bits. +// That is too dangerous a default to implement. We chose 8*cbKey rather than 1024 as +// our choice provides slightly better mixing of the key bytes into the expanded key. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRc2ExpandKeyEx( + _Out_ PSYMCRYPT_RC2_EXPANDED_KEY pExpandedKey, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey, + UINT32 effectiveKeySizeInBits ); +// +// Rc2 has an option to limit the effective key size, which means the key expansion function has an extra +// parameter. +// +// The effective key size in bits may be any value from 9..1024. If it is larger than 8*cbKey it does +// not significantly affect the key strength. However, the expanded key will always depend on the +// effective key size; expanding the same string of key bytes with differ effective key sizes leads +// to different expanded keys and different encryption functions. +// +// The original default was an effective key size of 40 bits. +// +// Do not allow your attacker to choose the effective key size. RC2 seems vulnerable to +// related-effective-key-size attacks. +// + +VOID +SYMCRYPT_CALL +SymCryptRc2Encrypt( + _In_ PCSYMCRYPT_RC2_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_RC2_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_RC2_BLOCK_SIZE ) PBYTE pbDst ); + +VOID +SYMCRYPT_CALL +SymCryptRc2Decrypt( + _In_ PCSYMCRYPT_RC2_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_RC2_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_RC2_BLOCK_SIZE ) PBYTE pbDst ); + + +VOID +SYMCRYPT_CALL +SymCryptRc2Selftest(void); + +extern const PCSYMCRYPT_BLOCKCIPHER SymCryptRc2BlockCipher; + + +//========================================================================== +// BLOCK CIPHER MODES +//========================================================================== +// +// Block cipher modes use the block cipher description tables to implement +// the various modes in a block-cipher independent way. +// +// Some block ciphers implement optimized versions of the block cipher modes. +// These functions call that optimized version, but calling the block-cipher specific +// function has less overhead. +// +// Note that these functions will only work with SymCrypt-provided block ciphers. +// They are not designed to be used with externally provided block ciphers. +// (The SYMCRYPT_BLOCKCIPHER structure is a private one not available to callers.) +// + +typedef enum _SYMCRYPT_BLOCKCIPHER_ID +{ + SYMCRYPT_BLOCKCIPHER_ID_NULL = 0, + SYMCRYPT_BLOCKCIPHER_ID_AES = 1, + SYMCRYPT_BLOCKCIPHER_ID_DES = 2, + SYMCRYPT_BLOCKCIPHER_ID_3DES = 3, + SYMCRYPT_BLOCKCIPHER_ID_DESX = 4, + SYMCRYPT_BLOCKCIPHER_ID_RC2 = 5 +} SYMCRYPT_BLOCKCIPHER_ID; + +PCSYMCRYPT_BLOCKCIPHER +SYMCRYPT_CALL +SymCryptGetBlockCipher( SYMCRYPT_BLOCKCIPHER_ID blockCipherId ); +// +// Returns a pointer to the block cipher structure for the specified block cipher ID. +// Returns NULL if the block cipher ID is invalid. +// + +VOID +SYMCRYPT_CALL +SymCryptEcbEncrypt( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ PCVOID pExpandedKey, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); +// +// Generic ECB encryption routine for block ciphers. +// +// - pBlockCipher is a pointer to the block cipher description table. +// Suitable description tables for all ciphers in this library have been pre-defined. +// - pExpandedKey points to the expanded key to use. This generic function uses PVOID so there +// is no type safety to ensure that the expanded key and the encryption function match. +// - pbSrc is the plaintext input buffer. The plaintext and ciphertext buffers may be +// identical (in-place encryption) or non-overlapping, but they may not partially overlap. +// - cbData. Number of bytes to encrypt. This must be a multiple of the block size. +// - pbDst is the result buffer. It may be identical to pbPlaintext or non-overlapping, +// but it may not partially overlap with the pbPlaintext buffer. +// + +VOID +SYMCRYPT_CALL +SymCryptEcbDecrypt( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ PCVOID pExpandedKey, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); +// +// Generic ECB decryption routine for block ciphers. +// +// - pBlockCipher is a pointer to the block cipher description table. +// Suitable description tables for all ciphers in this library have been pre-defined. +// - pExpandedKey points to the expanded key to use. This generic function uses PVOID so there +// is no type safety to ensure that the expanded key and the encryption function match. +// - pbSrc is the plaintext input buffer. The plaintext and ciphertext buffers may be +// identical (in-place encryption) or non-overlapping, but they may not partially overlap. +// - cbData. Number of bytes to encrypt. This must be a multiple of the block size. +// - pbDst is the result buffer. It may be identical to pbPlaintext or non-overlapping, +// but it may not partially overlap with the pbPlaintext buffer. +// + + +VOID +SYMCRYPT_CALL +SymCryptCbcEncrypt( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ PCVOID pExpandedKey, + _Inout_updates_( pBlockCipher->blockSize ) + PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +// +// Generic CBC encryption routine for block ciphers. +// +// - pBlockCipher is a pointer to the block cipher description table. +// Suitable description tables for all ciphers in this library have been pre-defined. +// - pExpandedKey points to the expanded key to use. This generic function uses PVOID so there +// is no type safety to ensure that the expanded key and the encryption function match. +// - pbChainingValue points to the chaining value. On entry it is the IV value for the CBC +// encryption, on return it is the last ciphertext block. A long message can be encrypted +// piecewise in multiple calls; at the end of one call the pbChainingValue buffer will contain +// the correct chaining value for encrypting the next piece of the message. +// Once the encryption is finished the value in the chaining buffer is no longer needed. +// - pbSrc is the plaintext input buffer. The plaintext and ciphertext buffers may be +// identical (in-place encryption) or non-overlapping, but they may not partially overlap. +// - cbData. Number of bytes to encrypt. This must be a multiple of the block size. +// - pbDst is the result buffer. It may be identical to pbPlaintext or non-overlapping, +// but it may not partially overlap with the pbPlaintext buffer. +// + + +VOID +SYMCRYPT_CALL +SymCryptCbcDecrypt( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ PCVOID pExpandedKey, + _Inout_updates_( pBlockCipher->blockSize ) + PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +// +// This is the decryption version of SymCryptCbcEncrypt. +// All parameters have the same explanation and restrictions.: +// + + +VOID +SYMCRYPT_CALL +SymCryptCbcMac( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ PCVOID pExpandedKey, + _Inout_updates_( pBlockCipher->blockSize ) + PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + SIZE_T cbData ); +// +// This function implements the same function as SymCryptCbcEncrypt except that +// it does not produce a ciphertext output. +// All other restrictions apply. +// The pbChainingValue is the only output provided. +// +// This is the primitive operation used by other modes of operation, +// and some platforms have special optimizations for this primitive. +// As we expose special APIs for some algorithms, we provide the generic function so that it +// can be used for all algorithms. +// + + +VOID +SYMCRYPT_CALL +SymCryptCtrMsb64( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ PCVOID pExpandedKey, + _Inout_updates_( pBlockCipher->blockSize ) + PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); +// +// This function implements the CTR cipher mode. +// It is not intended to be used as-is, rather it is a building block for modes like CCM. +// On some platforms we have optimized code for AES-CTR, on other platforms +// we use this generic construction to achieve the same effect. +// +// Note that in CTR mode encryption and decryption are the same operation. +// +// - pBlockCipher is a pointer to the block cipher description table. +// Suitable description tables for all ciphers in this library have been pre-defined. +// - pExpandedKey points to the expanded key to use. This generic function uses PVOID so there +// is no type safety to ensure that the expanded key and the encryption function match. +// - pbChainingValue points to the chaining value. On entry it is the first counter value to be +// used. On exit is the next counter value to be used. +// The pbChainingValue is incremented by cbData/blockSize. +// The increment function treats the last 8 bytes of pbChaining a MSBfirst integer +// and increments the integer representation by one for each block. +// - pbSrc is the input data buffer that will be encrypted/decrypted. +// - cbData. Number of bytes to encrypt/decrypt. This must be a multiple of the block size. +// - pbDst is the output buffer that receives the encrypted/decrypted data. The input and output +// buffers may be the same or non-overlapping, but may not partially overlap. +// + +VOID +SYMCRYPT_CALL +SymCryptCfbEncrypt( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + SIZE_T cbShift, + _In_ PCVOID pExpandedKey, + _Inout_updates_( pBlockCipher->blockSize ) + PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); +// +// Encrypt a buffer using the CFB cipher mode. +// +// This implements the CFB mode, with selected shift amount (in bytes). +// In general, one block cipher encryption is used for each cbShift bytes +// of plaintext, which can be slow. +// Use of this cipher mode is not recommended. +// +// - pBlockCipher is a pointer to the block cipher description table. +// Suitable description tables for all ciphers in this library have been pre-defined. +// - cbShift is the shift value (in bytes) of the CFB mode. +// The only supported values are 1 and the block size. +// - pExpandedKey points to the expanded key to use. This generic function uses PVOID so there +// is no type safety to ensure that the expanded key and the encryption function match. +// - pbChainingValue points to the chaining value. On entry and exit it +// contains the last blockSize ciphertext bytes. +// - pbSrc is the input data buffer that will be encrypted/decrypted. +// - cbData. Number of bytes to encrypt/decrypt. +// Must be a multiple of cbShift, or a multiple of the block size if cbShift = 0. +// - pbDst is the output buffer that receives the encrypted/decrypted data. The input and output +// buffers may be the same or non-overlapping, but may not partially overlap. +// + +VOID +SYMCRYPT_CALL +SymCryptCfbDecrypt( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + SIZE_T cbShift, + _In_ PCVOID pExpandedKey, + _Inout_updates_( pBlockCipher->blockSize ) + PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); +// +// The corresponding decryption routine. +// + +VOID +SYMCRYPT_CALL +SymCryptPaddingPkcs7Add( + SIZE_T cbBlockSize, + _In_reads_(cbSrc) PCBYTE pbSrc, + SIZE_T cbSrc, + _Out_writes_to_(cbDst, *pcbResult) PBYTE pbDst, + SIZE_T cbDst, + SIZE_T* pcbResult); +// +// Prerequisites: +// cbBlockSize is a power of 2 and < 256 +// cbDst >= cbSrc - cbSrc % cbBlockSize + cbBlockSize +// +// Add PKCS7 block padding to a message +// The input data (pbSrc,cbSrc) is padded with between 1 and cbBlockSize bytes so that +// the length of the result is a multiple of cbBlockSize. +// The padded message is written to the pbDst buffer. +// The length of the padded message is returned in *pcbResult. +// +// If pbSrc == pbDst this function avoids copying all the data. +// Note that cbSrc == cbDst is not valid as it violates the prerequisites. +// Padding a message with cbSrc == 0 is valid. +// +// Note: +// Any whole blocks in Src are merely copied to Dst. +// Callers can either process the whole message in this call, +// or handle the whole blocks themselves and only pass the last few bytes of the message to this function. +// +// Note: the prerequisites are not checked by this function; if they are not satisfied +// the behaviour of the function is undefined. +// + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptPaddingPkcs7Remove( + SIZE_T cbBlockSize, + _In_reads_(cbSrc) PCBYTE pbSrc, + SIZE_T cbSrc, + _Out_writes_to_(cbDst, *pcbResult) PBYTE pbDst, + SIZE_T cbDst, + SIZE_T* pcbResult); +// +// Prerequisites: +// - cbBlockSize is a power of 2 and < 256 +// - cbSrc is a multiple of cbBlockSize +// - cbSrc is greater than zero (at least equals to cbBlockSize) +// +// Remove PKCS7 block padding from a message in a side-channel safe way. +// *** see below for important rules the caller should follow w.r.t. side-channel safety *** +// The input data (pbSrc, cbSrc) is a valid PKCS7 padded message for the given blocksize. +// This function removes the padding, copies the result to the (pbDst, cbDst) buffer, +// and returns the size of the result in *pcbResult. +// +// This function only supports padding with a size up to the block size. +// +// If pbSrc == pbDst this function avoids copying data. +// +// The following errors are returned: +// - SYMCRYPT_INVALID_ARGUMENT if cbSrc or the padding is invalid +// - SYMCRYPT_BUFFER_TOO_SMALL if cbDst < size of the unpadded message +// If cbDst >= cbSrc the SYMCRYPT_BUFFER_TOO_SMALL error will not be returned. +// Even if an error is returned, the pbDst buffer may or may not contain data from the message. +// Callers should wipe the buffer even if an error is returned. +// +// Note: Removal of PKCS7 padding is extremely sensitive to side channels. +// For example, if a message is encrypted with AES-CBC and the attacker can modify +// the ciphertext and then determine whether a padding error occurs during decryption, +// then the attacker can use the presence or absence of the error to decrypt the message itself. +// This function takes great care not to reveal whether an error occurred, and hides +// the size of the unpadded message. This is even true when writing to pbDst. If cbDst is large +// enough, the code will write cbSrc-1 bytes to pbDst, using masking to only update the bytes of the +// message and leaving the other bytes in pbDst unchanged. +// Callers should take great care not to reveal the returned error or success, +// or the size of the returned message, until they have authenticated +// the source of the data. +// +// In particular, any mapping of the error code should be done in a side-channel safe way. +// See the SymCryptMapUint32() function for a side-channel safe way to map error codes. +// +// The error caused by an invalid cbSrc value is not hidden from side channels as this does not reveal any +// secret information. +// +// Note: callers can either process the whole message in this call, +// or process the whole blocks themselves and only pass the last block to this function. + +//////////////////////////// +// CCM +//////////////////////////// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptCcmValidateParameters( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ SIZE_T cbNonce, + _In_ SIZE_T cbAssociatedData, + _In_ UINT64 cbData, + _In_ SIZE_T cbTag + ); +// +// To achieve maximum performance, CCM functions do not check for valid parameters. +// Passing invalid parameters can lead to buffer overflows. +// Callers who want to validate their CCM parameters can call this function. +// Note: In Checked builds some CCM functions might fatal out when invalid parameters are +// passed. +// + + +VOID +SYMCRYPT_CALL +SymCryptCcmEncrypt( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ PCVOID pExpandedKey, + _In_reads_( cbNonce ) PCBYTE pbNonce, + SIZE_T cbNonce, + _In_reads_opt_( cbAuthData ) PCBYTE pbAuthData, + SIZE_T cbAuthData, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData, + _Out_writes_( cbTag ) PBYTE pbTag, + SIZE_T cbTag ); + +// +// Encrypt a buffer using the block cipher in CCM mode. +// - pBlockCipher points to the block cipher description table. +// - pExpandedKey points to the expanded key for the block cipher. +// - pbNonce: Pointer to the nonce for this encryption. For a single key, each nonce +// value may be used at most once to encrypt data. Re-using nonce values leads +// to catastrophic loss of security. +// - cbNonce: number of bytes in the nonce: 7 <= cbNonce <= 13. +// - pbAuthData: pointer to the associated authentication data. This data is not encrypted +// but it is included in the authentication. Use NULL if not used. +// - cbAuthData: # bytes of associated authentication data. (0 if not used) +// - pbSrc: plaintext input +// - pbDst: ciphertext output. The ciphertext buffer may be identical to the plaintext +// buffer, or non-overlapping. The ciphertext is also cbData bytes long. +// - cbData: # bytes of plaintext input. The maximum length is 2^{8(15-cbNonce)} - 1 bytes. +// - pbTag: buffer that will receive the authentication tag. +// - cbTag: size of tag. cbTag must be one of {4, 6, 8, 10, 12, 14, 16}. +// + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptCcmDecrypt( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ PCVOID pExpandedKey, + _In_reads_( cbNonce ) PCBYTE pbNonce, + SIZE_T cbNonce, + _In_reads_opt_( cbAuthData ) PCBYTE pbAuthData, + SIZE_T cbAuthData, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData, + _In_reads_( cbTag ) PCBYTE pbTag, + SIZE_T cbTag ); +// +// Decrypt a buffer using the block cipher in CCM mode. +// See SymCryptCcmEncrypt for a description of the parameters. This function decrypts rather than +// encrypts, and as a result the pbTag parameter is read rather than filled. +// +// If the tag value is not correct the SYMCRYPT_AUTHENTICATION_FAILURE error is returned and the pbDst buffer +// is wiped of any plaintext. +// Note: While checking the authentication the purported plaintext is stored in pbDst. It is not safe to reveal +// purported plaintext when the authentication has not been checked. (Doing so would reveal key stream information +// that can be used to decrypt any message encrypted with the same nonce value.) Thus, users should be careful +// to not reveal the pbDst buffer until this function returns (e.g. through other threads or sharing memory). +// + +// +// We also provide functions for incremental computation of CCM encryption and decryption. See the functions +// above for a description of the parameters and restrictions. +// In particular, note that the restriction on revealing the plaintext for unauthenticated decryptions holds +// for all the decrypted data, even when the decryption is done incrementally. +// +// SYMCRYPT_CCM_STATE +// Ongoing state of an incremental CCM encryption or decryption operation. +// + +VOID +SYMCRYPT_CALL +SymCryptCcmInit( + _Out_ PSYMCRYPT_CCM_STATE pState, + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ PCVOID pExpandedKey, + _In_reads_( cbNonce ) PCBYTE pbNonce, + SIZE_T cbNonce, + _In_reads_opt_( cbAuthData ) PCBYTE pbAuthData, + SIZE_T cbAuthData, + UINT64 cbData, + SIZE_T cbTag ); +// +// Initialize a CCM computation. Note that the ultimate data length has to be provided. +// The pBlockCipher and pExpandedKey structures must remain unchanged until the CCM computation is finished. +// + +VOID +SYMCRYPT_CALL +SymCryptCcmEncryptPart( + _Inout_ PSYMCRYPT_CCM_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptCcmEncryptFinal( + _Inout_ PSYMCRYPT_CCM_STATE pState, + _Out_writes_( cbTag ) PBYTE pbTag, + SIZE_T cbTag ); +// +// Note: passing cbTag is redundant but necessary for SAL purposes. +// + +VOID +SYMCRYPT_CALL +SymCryptCcmDecryptPart( + _Inout_ PSYMCRYPT_CCM_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptCcmDecryptFinal( + _Inout_ PSYMCRYPT_CCM_STATE pState, + _In_reads_( cbTag ) PCBYTE pbTag, + SIZE_T cbTag ); +// +// WARNING: When the authentication fails the data already decrypted may not be revealed. +// This function cannot wipe the plaintext buffers; the caller is responsible for ensuring +// the plaintext is not revealed. +// + +VOID +SYMCRYPT_CALL +SymCryptCcmSelftest(void); +// +// Self test for CCM cipher mode +// + +/////////////////////////////////////// +// GCM +/////////////////////////////////////// +// +// The GCM algorithm per SP 800-38D. +// GMAC is just GCM with an empty data string; all the data is put in the pbAuthData buffer. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptGcmValidateParameters( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ SIZE_T cbNonce, + _In_ UINT64 cbAssociatedData, + _In_ UINT64 cbData, + _In_ SIZE_T cbTag + ); +// +// To achieve maximum performance, GCM functions do not check for valid parameters. +// Passing invalid parameters can lead to buffer overflows. +// Callers who want to validate their GCM parameters can call this function. +// Note: In Checked builds some GCM functions might fatal out when invalid parameters are +// passed. +// + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptGcmExpandKey( + _Out_ PSYMCRYPT_GCM_EXPANDED_KEY pExpandedKey, + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_reads_( cbKey ) PCBYTE pbKey, + SIZE_T cbKey ); +// +// Create an expanded key suitable for GCM +// + +VOID +SYMCRYPT_CALL +SymCryptGcmKeyCopy( _In_ PCSYMCRYPT_GCM_EXPANDED_KEY pSrc, _Out_ PSYMCRYPT_GCM_EXPANDED_KEY pDst ); + +// +// Create a copy of an expanded key +// + +VOID +SYMCRYPT_CALL +SymCryptGcmEncrypt( + _In_ PCSYMCRYPT_GCM_EXPANDED_KEY pExpandedKey, + _In_reads_( cbNonce ) PCBYTE pbNonce, + SIZE_T cbNonce, + _In_reads_opt_( cbAuthData ) PCBYTE pbAuthData, + SIZE_T cbAuthData, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData, + _Out_writes_( cbTag ) PBYTE pbTag, + SIZE_T cbTag ); + +// +// Encrypt a buffer using the block cipher in GCM mode. +// - pExpandedKey points to the expanded key for GCM. +// - pbNonce: Pointer to the nonce for this encryption. For a single key, each nonce +// value may be used at most once to encrypt data. Re-using nonce values leads +// to catastrophic loss of security. Only 12-byte nonces are supported, +// per the SP800-38D section 5.2.1.1 recommendation. +// - cbNonce: number of bytes in the nonce, must be 12. +// - pbAuthData: pointer to the associated authentication data. This data is not encrypted +// but it is included in the authentication. Use NULL if not used. +// - cbAuthData: # bytes of associated authentication data. (0 if not used) +// - pbSrc: plaintext input +// - pbDst: ciphertext output. The ciphertext buffer may be identical to the plaintext +// buffer, or non-overlapping. The ciphertext is also cbData bytes long. +// - cbData: # bytes of plaintext input. The maximum length is 2^{36} - 32 bytes. +// - pbTag: buffer that will receive the authentication tag. +// - cbTag: size of tag. cbTag must be one of {12, 13, 14, 15, 16} per SP800-38D +// section 5.2.1.2. The optional shorter tag sizes (4 and 8) are not supported. +// + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptGcmDecrypt( + _In_ PCSYMCRYPT_GCM_EXPANDED_KEY pExpandedKey, + _In_reads_( cbNonce ) PCBYTE pbNonce, + SIZE_T cbNonce, + _In_reads_opt_( cbAuthData ) PCBYTE pbAuthData, + SIZE_T cbAuthData, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData, + _In_reads_( cbTag ) PCBYTE pbTag, + SIZE_T cbTag ); +// +// Decrypt a buffer using the block cipher in GCM mode. +// See SymCryptGcmEncrypt for a description of the parameters. This function decrypts rather than +// encrypts, and as a result the pbTag parameter is read rather than filled. +// If the tag value is not correct the SYMCRYPT_AUTHENTICATION_FAILURE error is returned and the pbDst buffer +// is wiped of any plaintext. +// Note: While checking the authentication the purported plaintext is stored in pbDst. It is not safe to reveal +// purported plaintext when the authentication has not been checked. (Doing so would reveal key stream information +// that can be used to decrypt any message encrypted with the same nonce value.) Thus, users should be careful +// to not reveal the pbDst buffer until this function returns (e.g. through other threads or sharing memory). +// + +// +// We also provide functions for incremental computation of GCM encryption and decryption. See the functions +// above for a description of the parameters and restrictions. +// In particular, note that the restriction on revealing the plaintext for unauthenticated decryptions holds +// for all the decrypted data, even when the decryption is done incrementally. +// +// +// SYMCRYPT_GCM_STATE +// Ongoing state of an incremental GCM encryption or decryption operation. +// + +VOID +SYMCRYPT_CALL +SymCryptGcmInit( + _Out_ PSYMCRYPT_GCM_STATE pState, + _In_ PCSYMCRYPT_GCM_EXPANDED_KEY pExpandedKey, + _In_reads_( cbNonce ) PCBYTE pbNonce, + SIZE_T cbNonce ); +// +// Initialize a GCM computation. +// The pBlockCipher and pExpandedKey structures must remain unchanged until the GCM computation is finished. +// + +VOID +SYMCRYPT_CALL +SymCryptGcmStateCopy( + _In_ PCSYMCRYPT_GCM_STATE pSrc, + _In_opt_ PCSYMCRYPT_GCM_EXPANDED_KEY pExpandedKeyCopy, + _Out_ PSYMCRYPT_GCM_STATE pDst ); +// +// Copy a GCM state. +// If pExpandedKeyCopy is NULL, then the new pDst state uses the same expanded key as pSrc. +// If pExpandedKeyCopy is not NULL, it must point to a copy of the expanded key of the pSrc state. +// This new expanded key will be used as the expanded key for pDst. +// + +VOID +SYMCRYPT_CALL +SymCryptGcmAuthPart( + _Inout_ PSYMCRYPT_GCM_STATE pState, + _In_reads_opt_( cbData ) PCBYTE pbAuthData, + SIZE_T cbData ); +// +// Incrementally process the authentication data. This function can be called multiple times +// after the SymCryptGcmInit function. It may not be called after any encrypt or decrypt +// function has been called on the GCM state. +// + +VOID +SYMCRYPT_CALL +SymCryptGcmEncryptPart( + _Inout_ PSYMCRYPT_GCM_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptGcmEncryptFinal( + _Inout_ PSYMCRYPT_GCM_STATE pState, + _Out_writes_( cbTag ) PBYTE pbTag, + SIZE_T cbTag ); + +VOID +SYMCRYPT_CALL +SymCryptGcmDecryptPart( + _Inout_ PSYMCRYPT_GCM_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptGcmDecryptFinal( + _Inout_ PSYMCRYPT_GCM_STATE pState, + _In_reads_( cbTag ) PCBYTE pbTag, + SIZE_T cbTag ); +// +// Returns SYMCRYPT_AUTHENTICATION_FAILURE if the tag value does not match. +// + + +VOID +SYMCRYPT_CALL +SymCryptGcmSelftest(void); +// +// Self test for GCM cipher mode +// + + +//========================================================================== +// SESSION BASED APIs +//========================================================================== + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSessionSenderInit( + _Inout_ PSYMCRYPT_SESSION pSession, + UINT32 senderId, + UINT32 flags ); +// +// Initialize an encryption session object. The default nonce size of 12B is used - 8B are provided +// by message number, 4B by senderId. +// - pSession: Pointer to an uninitialized session object. +// - senderId: The id of the sender (must be unique for each user of a given key). +// Callers should either choose a senderId which is specific to the sender, or +// at least to the software and role in a system in which a key is being used. +// Two encryption sessions using the same key and senderId leads to catastrophic loss of security. +// - No flags are specified for this function +// +// Remarks: +// On some platforms use of a session object requires use of a mutex. On those platforms this +// function will call SymCryptCallbackAllocateMutexFastInproc and may indicate failure by returning +// SYMCRYPT_MEMORY_ALLOCATION_FAILURE if a mutex object cannot be created. +// Callers must call SymCryptSessionDestroy to ensure any associated allocated mutex object is freed +// either before calling another Init function on the SYMCRYPT_SESSION object, and instead of directly +// calling SymCryptWipeKnownSize on the object. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSessionReceiverInit( + _Inout_ PSYMCRYPT_SESSION pSession, + UINT32 senderId, + UINT32 flags ); +// +// Initialize an decryption session object. The default nonce size of 12B is used - 8B are provided +// by message number, 4B by senderId. +// - pSession: Pointer to an uninitialized session object. +// - senderId: The id of the sender (must be unique for each user of a given key). +// Callers should either choose a senderId which is specific to the sender, or +// at least to the software and role in a system in which a key is being used. +// The id used in a decryption session must be the same as the id used in the corresponding +// encryption session (i.e. sender and receiver must agree upon a senderId for their +// communication session) +// - No flags are specified for this function +// +// Remarks: +// On some platforms use of a session object requires use of a mutex. On those platforms this +// function will call SymCryptCallbackAllocateMutexFastInproc and may indicate failure by returning +// SYMCRYPT_MEMORY_ALLOCATION_FAILURE if a mutex object cannot be created. +// Callers must call SymCryptSessionDestroy to ensure any associated allocated mutex object is freed +// either before calling another Init function on the SYMCRYPT_SESSION object, and instead of directly +// calling SymCryptWipeKnownSize on the object. +// + +VOID +SYMCRYPT_CALL +SymCryptSessionDestroy( + _Inout_ PSYMCRYPT_SESSION pSession ); +// +// Clear session object and free any data associated with the object (i.e. allocated locks) +// After this call the memory used for pSession is uninitialized and can be used for other purposes. +// Note that it is not safe to just wipe the memory of the session object as the session +// object contains pointers to other allocations. +// The only way to safely destroy a session is to use this function. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSessionGcmEncrypt( + _Inout_ PSYMCRYPT_SESSION pSession, + _In_ PCSYMCRYPT_GCM_EXPANDED_KEY pExpandedKey, + _In_reads_opt_( cbAuthData ) PCBYTE pbAuthData, + SIZE_T cbAuthData, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData, + _Out_writes_( cbTag ) PBYTE pbTag, + SIZE_T cbTag, + _Out_opt_ PUINT64 pu64MessageNumber ); +// +// Encrypt a buffer, in a series, using the block cipher in GCM mode. +// - pSession points to the session object for this series of GCM encryptions. It handles +// ensuring Nonce uniqueness across several encryption calls using the same key. The message +// number in the pSession object is atomically incremented by this call. +// If too many messages (2^64 - 2^32) have been encrypted with the same session object, +// SYMCRYPT_INVALID_ARGUMENT is returned and no encryption takes place. This should never +// occur in real use! +// - pExpandedKey points to the expanded key for GCM. +// - pbAuthData: pointer to the associated authentication data. This data is not encrypted +// but it is included in the authentication. Use NULL if not used. +// - cbAuthData: # bytes of associated authentication data. (0 if not used) +// - pbSrc: plaintext input +// - pbDst: ciphertext output. The ciphertext buffer may be identical to the plaintext +// buffer, or non-overlapping. The ciphertext is also cbData bytes long. +// - cbData: # bytes of plaintext input. The maximum length is 2^{36} - 32 bytes. +// - pbTag: buffer that will receive the authentication tag. +// - cbTag: size of tag. cbTag must be one of {12, 13, 14, 15, 16} per SP800-38D +// section 5.2.1.2. The optional shorter tag sizes (4 and 8) are not supported. +// - pu64MessageNumber: Optional message number output for this encryption. A unique message +// number is extracted from the pSession object, this output is set to the value used in +// the encryption. The first message number generated in a session will have the value 1, +// and subsequent message numbers will be taken by atomically incrementing the counter. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSessionGcmDecrypt( + _Inout_ PSYMCRYPT_SESSION pSession, + UINT64 messageNumber, + _In_ PCSYMCRYPT_GCM_EXPANDED_KEY pExpandedKey, + _In_reads_opt_( cbAuthData ) PCBYTE pbAuthData, + SIZE_T cbAuthData, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData, + _In_reads_( cbTag ) PCBYTE pbTag, + SIZE_T cbTag ); +// +// Decrypt a buffer, in a series, using the block cipher in GCM mode. +// - pSession points to the session object for this series of GCM decryptions. It handles +// ensuring Nonce uniqueness across several decryption calls using the same key, particularly +// ensuring there are no replays. +// - messageNumber: The message number to be used for this decryption, forming part of the Nonce. +// When performing decryption in a session, it is guaranteed that no 2 decryptions using the +// same session and same message number can succeed. This is to provide protection against +// replay attacks. +// In order to provide this guarantee, pSession tracks a window of used message numbers +// preceding the largest messageNumber successfully used so far in the decryption session. +// A SYMCRYPT_SESSION_REPLAY_FAILURE error will be returned if either: +// a) messageNumber is less than the smallest message number that can be tracked for replays +// b) messageNumber is within the window that can be tracked for replays, and the message +// number is marked as already having been used in a successful decryption in this session +// In either case, the destination buffer is wiped. +// See SymCryptSessionGcmEncrypt for a description of the other parameters. This function decrypts +// rather than encrypts, and as a result the pbTag parameter is read rather than filled. +// If the tag value is not correct the SYMCRYPT_AUTHENTICATION_FAILURE error is returned and the +// pbDst buffer is wiped of any plaintext. +// Note: While checking the authentication the purported plaintext is stored in pbDst. It is not safe to reveal +// purported plaintext when the authentication has not been checked. (Doing so would reveal key stream information +// that can be used to decrypt any message encrypted with the same nonce value.) Thus, users should be careful +// to not reveal the pbDst buffer until this function returns (e.g. through other threads or sharing memory). +// + + +//========================================================================== +// STREAM CIPHERS +//========================================================================== + +//////////////////////////////////////////////////////////////////////////// +// RC4 +// +// The RC4 stream cipher +// +// Use of RC4 is not recommended. +// +// The RC4 implementation makes extensive use of table lookups to implement the S-boxes of the algorithm. +// This violates our current crypto implementation guidelines and opens up a possible side-channel attack +// through information leakage via the memory caching system of the CPU. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRc4Init( + _Out_ PSYMCRYPT_RC4_STATE pState, + _In_reads_( cbKey ) PCBYTE pbKey, + _In_ SIZE_T cbKey ); +// +// Initialize an RC4 encryption/decryption state. +// WARNING: the most common error in using RC4 is to use the same key to encrypt two different pieces of data. +// This is insecure and should never be done; you need a unique key for each data element that is encrypted. +// + +VOID +SYMCRYPT_CALL +SymCryptRc4Crypt( + _Inout_ PSYMCRYPT_RC4_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + _In_ SIZE_T cbData ); +// +// Encrypt or Decrypt data using the RC4 state. Note that the RC4 state is updated and therefore this +// function cannot be used by two threads simultaneously using the same state object. +// + +VOID +SYMCRYPT_CALL +SymCryptRc4Selftest(void); + + +// +// ChaCha20 +// +// The ChaCha20 stream cipher is specified in RFC 7539 and referenced by RFC 7905 +// which specifies the ChaCha20-Poly1305 TLS cipher suite. +// +// ChaCha is a random-access stream cipher. It is possible to jump to any part of +// the key stream and start en/decrypting there. +// We support this by allowing the caller to select the position in the key stream +// to use. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptChaCha20Init( + _Out_ PSYMCRYPT_CHACHA20_STATE pState, + _In_reads_( cbKey ) PCBYTE pbKey, + _In_ SIZE_T cbKey, + _In_reads_( cbNonce ) PCBYTE pbNonce, + SIZE_T cbNonce, + UINT64 offset ); +// +// Initialize a ChaCha20 en/decryption state. +// Key must be 32 bytes +// Nonce must be 12 bytes +// offset is the position into the key stream that the next encrypt/decrypt +// operation will use. Requirement: 0 <= offset < 2^38 +// The ChaCha documentation is formulated in terms of a 'counter' or 'initial counter'. +// Callers can set offset = 64 * <counter> to achieve the same results. +// +// An error is returned only for invalid key or nonce sizes. +// +// A single (key,nonce) pair defines a key stream of 256 GB. +// Any part of that key stream can be used to encrypt a message, or part of a +// message. +// Note that it is critical that each key stream byte is used only once; thus +// callers have to ensure that for any key, each nonce is used at most once for +// a message, and messages cannot use any part of the 256 GB key stream more than +// once. +// + +VOID +SYMCRYPT_CALL +SymCryptChaCha20SetOffset( + _Inout_ PSYMCRYPT_CHACHA20_STATE pState, + UINT64 offset ); +// +// Specify the offset into the key stream where the next encrypt/decrypt operation +// will start. +// Requirement: 0 <= offset < 2^38 +// + +VOID +SYMCRYPT_CALL +SymCryptChaCha20Crypt( + _Inout_ PSYMCRYPT_CHACHA20_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); +// +// Encrypt or Decrypt data using the CHACHA20 state. +// The Src data is xorred with the key stream generated from the state, and the result stored +// in the Dst buffer. The Src and Dst buffer can be identical or non-overlapping; partial overlaps +// are not supported. +// As the state is updated two threads cannot en/decrypt with the same state at the same time. +// The key stream used is the one generated from the key and nonce, starting at the specified +// offset into the key stream. This function updates the offset of the state by adding cbData to +// it so that the next call will use the next part of the key stream. +// Any attempt to use the key stream at offset >= 2^38 will result in catastrophic loss of security. +// + +VOID +SYMCRYPT_CALL +SymCryptChaCha20Selftest(void); + + + + +//========================================================================== +// KEY DERIVATION ALGORITHMS +//========================================================================== + +//////////////////////////////////////////////////////////////////////////// +// PBKDF2 +// +// Generic KDF parameter handling: +// - Generic parameter is passed in the Salt input; +// - iterationCnt is set to 1. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptPbkdf2ExpandKey( + _Out_ PSYMCRYPT_PBKDF2_EXPANDED_KEY pExpandedKey, + _In_ PCSYMCRYPT_MAC macAlgorithm, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptPbkdf2Derive( + _In_ PCSYMCRYPT_PBKDF2_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbSalt) PCBYTE pbSalt, + SIZE_T cbSalt, + UINT64 iterationCnt, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptPbkdf2( + PCSYMCRYPT_MAC macAlgorithm, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey, + _In_reads_opt_(cbSalt) PCBYTE pbSalt, + SIZE_T cbSalt, + UINT64 iterationCnt, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult); + +// +// Because the self-test pulls in the associated MAC function, +// we have several self-tests; each of which tests the PBKDF2 implementation +// using the specified MAC function. +// This allows a FIPS module to run the self-test with the MAC function it already +// uses internally. +// +// More can be added when needed. +// + +VOID +SYMCRYPT_CALL +SymCryptPbkdf2_HmacSha1SelfTest(void); + +VOID +SYMCRYPT_CALL +SymCryptPbkdf2_HmacSha256SelfTest(void); + +//////////////////////////////////////////////////////////////////////////// +// SP800-108 Counter mode +// +// Generic KDF parameter handling: +// Generic parameter contains the concatenation of the Label, a zero byte, and the Context. +// To pass a generic parameter do the following: +// - pbLabel = NULL +// - cbLabel = (SIZE_T) -1; +// - pbContext/cbContext = generic parameter +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSp800_108ExpandKey( + _Out_ PSYMCRYPT_SP800_108_EXPANDED_KEY pExpandedKey, + _In_ PCSYMCRYPT_MAC macAlgorithm, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSp800_108Derive( + _In_ PCSYMCRYPT_SP800_108_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbLabel) PCBYTE pbLabel, + SIZE_T cbLabel, + _In_reads_opt_(cbContext) PCBYTE pbContext, + SIZE_T cbContext, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSp800_108( + PCSYMCRYPT_MAC macAlgorithm, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey, + _In_reads_opt_(cbLabel) PCBYTE pbLabel, + SIZE_T cbLabel, + _In_reads_opt_(cbContext) PCBYTE pbContext, + SIZE_T cbContext, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult); + +VOID +SYMCRYPT_CALL +SymCryptSp800_108_HmacSha1SelfTest(void); + +VOID +SYMCRYPT_CALL +SymCryptSp800_108_HmacSha256SelfTest(void); + +VOID +SYMCRYPT_CALL +SymCryptSp800_108_HmacSha384SelfTest(void); + +VOID +SYMCRYPT_CALL +SymCryptSp800_108_HmacSha512SelfTest(void); + +//////////////////////////////////////////////////////////////////////////// +// TLS Key Derivation PRFs +// +// PRFs used in the key derivation functions of the TLS protocol, versions +// 1.0, 1.1, and 1.2. These are defined in RFC 2246, 4346, and 5246, +// respectively. +// Note: The PRFs for versions 1.0 and 1.1 are identical. +// + +// Maximum sizes (in bytes) for the label and the seed inputs. See the +// above RFCs 2246, 4346, and 5246 for more details. +#define SYMCRYPT_TLS_MAX_LABEL_SIZE 256 +#define SYMCRYPT_TLS_MAX_SEED_SIZE 256 + +// +// Version 1.0/1.1 +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptTlsPrf1_1ExpandKey( + _Out_ PSYMCRYPT_TLSPRF1_1_EXPANDED_KEY pExpandedKey, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptTlsPrf1_1Derive( + _In_ PCSYMCRYPT_TLSPRF1_1_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbLabel) PCBYTE pbLabel, + _In_ SIZE_T cbLabel, // Up to SYMCRYPT_TLS_MAX_LABEL_SIZE + _In_reads_(cbSeed) PCBYTE pbSeed, + _In_ SIZE_T cbSeed, // Up to SYMCRYPT_TLS_MAX_SEED_SIZE + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptTlsPrf1_1( + _In_reads_(cbKey) PCBYTE pbKey, + _In_ SIZE_T cbKey, + _In_reads_opt_(cbLabel) PCBYTE pbLabel, + _In_ SIZE_T cbLabel, + _In_reads_(cbSeed) PCBYTE pbSeed, + _In_ SIZE_T cbSeed, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult); + +VOID +SYMCRYPT_CALL +SymCryptTlsPrf1_1SelfTest(void); + +// +// Version 1.2 +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptTlsPrf1_2ExpandKey( + _Out_ PSYMCRYPT_TLSPRF1_2_EXPANDED_KEY pExpandedKey, + _In_ PCSYMCRYPT_MAC macAlgorithm, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptTlsPrf1_2Derive( + _In_ PCSYMCRYPT_TLSPRF1_2_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbLabel) PCBYTE pbLabel, + _In_ SIZE_T cbLabel, // Up to SYMCRYPT_TLS_MAX_LABEL_SIZE + _In_reads_(cbSeed) PCBYTE pbSeed, + _In_ SIZE_T cbSeed, // Up to SYMCRYPT_TLS_MAX_SEED_SIZE + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptTlsPrf1_2( + _In_ PCSYMCRYPT_MAC macAlgorithm, + _In_reads_(cbKey) PCBYTE pbKey, + _In_ SIZE_T cbKey, + _In_reads_opt_(cbLabel) PCBYTE pbLabel, + _In_ SIZE_T cbLabel, + _In_reads_(cbSeed) PCBYTE pbSeed, + _In_ SIZE_T cbSeed, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult); + +VOID +SYMCRYPT_CALL +SymCryptTlsPrf1_2SelfTest(void); + + +//////////////////////////////////////////////////////////////////////////// +// SSH-KDF as specified in RFC 4253 Section 7.2. +// + + +// Labels defined in RFC 4253 +#define SYMCRYPT_SSHKDF_IV_CLIENT_TO_SERVER 0x41 // 'A' +#define SYMCRYPT_SSHKDF_IV_SERVER_TO_CLIENT 0x42 // 'B' +#define SYMCRYPT_SSHKDF_ENCRYPTION_KEY_CLIENT_TO_SERVER 0x43 // 'C' +#define SYMCRYPT_SSHKDF_ENCRYPTION_KEY_SERVER_TO_CLIENT 0x44 // 'D' +#define SYMCRYPT_SSHKDF_INTEGRITY_KEY_CLIENT_TO_SERVER 0x45 // 'E' +#define SYMCRYPT_SSHKDF_INTEGRITY_KEY_SERVER_TO_CLIENT 0x46 // 'F' + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSshKdfExpandKey( + _Out_ PSYMCRYPT_SSHKDF_EXPANDED_KEY pExpandedKey, + _In_ PCSYMCRYPT_HASH pHashFunc, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey); +// +// Process the key using the specified hash function and store the result in +// SYMCRYPT_SSHKDF_EXPANDED_KEY structure. Once the key is expanded, +// SymCryptSshKdfDerive can be called multiple times to generate keys for +// different uses/labels. +// +// After all the keys are derived from a particular "shared secret" key, +// SYMCRYPT_SSHKDF_EXPANDED_KEY structure must be wiped. +// +// Parameters: +// - pExpandedKey : Pointer to a SYMCRYPT_SSHKDF_EXPANDED_KEY structure that +// will contain the expanded key after the function returns. +// - pHashFunc : Hash function that will be used in the key derivation. +// This function is saved in SYMCRYPT_SSHKDF_EXPANDED_KEY +// so that it is also used by the SymCryptSshKdfDerive function. +// - pbKey, cbKey : Buffer containing the secret key for the KDF. +// +// Returns SYMCRYPT_NO_ERROR +// + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSshKdfDerive( + _In_ PCSYMCRYPT_SSHKDF_EXPANDED_KEY pExpandedKey, + _In_reads_(cbHashValue) PCBYTE pbHashValue, + SIZE_T cbHashValue, + BYTE label, + _In_reads_(cbSessionId) PCBYTE pbSessionId, + SIZE_T cbSessionId, + _Inout_updates_(cbOutput) PBYTE pbOutput, + SIZE_T cbOutput); +// +// Derive keys using the expanded key that was initialized with SymCryptSshKdfExpandKey +// along with other inputs. This function can be called consecutively with varying label +// values to generate keys for different purposes as defined in the RFC. +// +// Parameters: +// - pExpandedKey : Pointer to a SYMCRYPT_SSHKDF_EXPANDED_KEY structure that is +// initialized by a prior call to SymCryptSshKdfExpandKey. +// Must be wiped when SymCryptSshKdfDerive is not going to be called +// again with the same expanded key. +// - pbHashValue, cbHashValue : Buffer pointing to "exchange hash" value. cbHashValue must be equal +// to the output size of the hash function passed to SymCryptSshKdfExpandKey. +// - label : Label value used to indicate the type of the derived key. +// - pbSessionId, cbSessionId : Buffer pointing to the session identifier. cbSessionId must be equal +// to the output size of the hash function passed to SymCryptSshKdfExpandKey. +// - pbOutput, cbOutput : Buffer to store the derived key. Exactly cbOutput bytes of output will be generated. +// +// Returns SYMCRYPT_NO_ERROR +// + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSshKdf( + _In_ PCSYMCRYPT_HASH pHashFunc, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey, + _In_reads_(cbHashValue) PCBYTE pbHashValue, + SIZE_T cbHashValue, + BYTE label, + _In_reads_(cbSessionId) PCBYTE pbSessionId, + SIZE_T cbSessionId, + _Out_writes_(cbOutput) PBYTE pbOutput, + SIZE_T cbOutput); +// +// This function is a wrapper for using SymCryptSshKdfExpandKey followed by SymCryptSshKdfDerive +// in order to produce SSH-KDF output. +// +// All of the function arguments are forwarded to SymCryptSshKdfExpandKey and SymCryptSshKdfDerive +// functions, hence the documentation on those functions apply here as well. +// + + +VOID +SYMCRYPT_CALL +SymCryptSshKdfSha256SelfTest(void); + +VOID +SYMCRYPT_CALL +SymCryptSshKdfSha512SelfTest(void); + + +//////////////////////////////////////////////////////////////////////////// +// SRTP-KDF as specified in RFC 3711 Section 4.3.1. +// + + +// Labels defined in RFC 3711 +#define SYMCRYPT_SRTP_ENCRYPTION_KEY 0x00 +#define SYMCRYPT_SRTP_AUTHENTICATION_KEY 0x01 +#define SYMCRYPT_SRTP_SALTING_KEY 0x02 +#define SYMCRYPT_SRTCP_ENCRYPTION_KEY 0x03 +#define SYMCRYPT_SRTCP_AUTHENTICATION_KEY 0x04 +#define SYMCRYPT_SRTCP_SALTING_KEY 0x05 + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSrtpKdfExpandKey( + _Out_ PSYMCRYPT_SRTPKDF_EXPANDED_KEY pExpandedKey, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey); +// +// Process the key and store the result in SYMCRYPT_SRTPKDF_EXPANDED_KEY structure. +// Once the key is expanded, SymCryptSrtpKdfDerive can be called multiple times to +// generate keys for different uses/labels. +// +// After all the keys are derived from a particular "shared secret" key, +// SYMCRYPT_SRTPKDF_EXPANDED_KEY structure must be wiped. +// +// Parameters: +// - pExpandedKey : Pointer to a SYMCRYPT_SRTPKDF_EXPANDED_KEY structure that +// will contain the expanded key after the function returns. +// - pbKey, cbKey : Buffer containing the secret key for the KDF. cbKey must be +// a valid AES key size (16-, 24-, or 32-bytes). +// +// Returns: +// SYMCRYPT_WRONG_KEY_SIZE : If cbKey is not a valid AES key size +// SYMCRYPT_NO_ERROR : On success +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSrtpKdfDerive( + _In_ PCSYMCRYPT_SRTPKDF_EXPANDED_KEY pExpandedKey, + _In_reads_(cbSalt) PCBYTE pbSalt, + SIZE_T cbSalt, + UINT32 uKeyDerivationRate, + UINT64 uIndex, + UINT32 uIndexWidth, + BYTE label, + _Out_writes_(cbOutput) PBYTE pbOutput, + SIZE_T cbOutput); +// +// Derive keys using the expanded key that was initialized with SymCryptSrtpKdfExpandKey +// along with other inputs. This function can be called consecutively with varying label +// values to generate keys for different purposes as defined in the RFC. +// +// Parameters: +// - pExpandedKey : Pointer to a SYMCRYPT_SRTPKDF_EXPANDED_KEY structure that is +// initialized by a prior call to SymCryptSrtpKdfExpandKey. +// Must be wiped when SymCryptSrtpKdfDerive is not going to be called +// again with the same expanded key. +// - pbSalt, cbSalt : Buffer pointing to the salt value. cbSalt must always be 14 (112-bits). +// - uKeyDerivationRate : Key derivation rate; must be zero or 2^i for 0 <= i <= 24. +// - uIndex : Denotes an SRTP index value when label is 0x00, 0x01, or 0x02, otherwise +// denotes an SRTCP index value. +// - uIndexWidth : Denotes how wide uIndex value is. Must be one of 0, 32, or 48. By default, +// (when uIndexWidth = 0) uIndex is treated as 48-bits. +// RFC 3711 initially defined SRTCP indices to be 32-bit values. It was updated +// to be 48-bits by Errata ID 3712. SRTP index values are defined to be 48-bits. +// - label : Label value used to indicate the type of the derived key. +// - pbOutput, cbOutput : Buffer to store the derived key. Exactly cbOutput bytes of output will be generated. +// +// Returns: +// SYMCRYPT_INVALID_ARGUMENT : If cbSalt is not 14-bytes, or uKeyDerivationRate in invalid. +// SYMCRYPT_NO_ERROR : On success. +// + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSrtpKdf( + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey, + _In_reads_(cbSalt) PCBYTE pbSalt, + SIZE_T cbSalt, + UINT32 uKeyDerivationRate, + UINT64 uIndex, + UINT32 uIndexWidth, + BYTE label, + _Out_writes_(cbOutput) PBYTE pbOutput, + SIZE_T cbOutput); +// +// This function is a wrapper for using SymCryptSrtpKdfExpandKey followed by SymCryptSrtpKdfDerive +// in order to produce SRTP-KDF output. +// +// All of the function arguments are forwarded to SymCryptSrtpKdfExpandKey and SymCryptSrtpKdfDerive +// functions, hence the documentation on those functions apply here as well. +// + + +VOID +SYMCRYPT_CALL +SymCryptSrtpKdfSelfTest(void); + + +//////////////////////////////////////////////////////////////////////////// +// HKDF +// +// PRF used in the key derivation functions of the TLS protocol, version +// 1.3. It is defined in RFC 5869. +// +// The SymCrypt ExtractPrk function corresponds to the "HKDF-Extract" function +// of the RFC 5869, while the SymCrypt PrkExpandKey and Derive functions +// correspond to the "HKDF-Expand" function of the RFC. +// +// SymCryptHkdfExtractPrk takes as inputs the MAC algorithm, the IKM (input +// keying material), and the optional salt. It executes the full "HKDF-Extract" +// function to produce the PRK (pseudorandom key). +// +// SymCryptHkdfPrkExpandKey takes as inputs just the MAC algorithm and the PRK. +// It produces the final (MAC) key to be used by the "HKDF-Expand" function. +// +// SymCryptHkdfExpandKey performs SymCryptHkdfExtractPrk followed by +// SymCryptHkdfPrkExpandKey to produce the final (MAC) key to be used by the +// "HKDF-Expand" function, without exposing the PRK to the caller. +// +// SymCryptHkdfDerive takes as input the final MAC key and the optional info. It +// performs the rest of the "HKDF-Expand" function to produce the HKDF result. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHkdfExpandKey( + _Out_ PSYMCRYPT_HKDF_EXPANDED_KEY pExpandedKey, + _In_ PCSYMCRYPT_MAC macAlgorithm, + _In_reads_(cbIkm) PCBYTE pbIkm, + SIZE_T cbIkm, + _In_reads_opt_(cbSalt) PCBYTE pbSalt, + SIZE_T cbSalt ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHkdfExtractPrk( + _In_ PCSYMCRYPT_MAC macAlgorithm, + _In_reads_(cbIkm) PCBYTE pbIkm, + SIZE_T cbIkm, + _In_reads_opt_(cbSalt) PCBYTE pbSalt, + SIZE_T cbSalt, + _Out_writes_(cbPrk) PBYTE pbPrk, + SIZE_T cbPrk ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHkdfPrkExpandKey( + _Out_ PSYMCRYPT_HKDF_EXPANDED_KEY pExpandedKey, + _In_ PCSYMCRYPT_MAC macAlgorithm, + _In_reads_(cbPrk) PCBYTE pbPrk, + SIZE_T cbPrk ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHkdfDerive( + _In_ PCSYMCRYPT_HKDF_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbInfo) PCBYTE pbInfo, + SIZE_T cbInfo, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHkdf( + PCSYMCRYPT_MAC macAlgorithm, + _In_reads_(cbIkm) PCBYTE pbIkm, + SIZE_T cbIkm, + _In_reads_opt_(cbSalt) PCBYTE pbSalt, + SIZE_T cbSalt, + _In_reads_opt_(cbInfo) PCBYTE pbInfo, + SIZE_T cbInfo, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult); + +VOID +SYMCRYPT_CALL +SymCryptHkdfSelfTest(void); + +//////////////////////////////////////////////////////////////////////////// +// SSKDF +// +// Single-Step KDF as specified in SP800-56C section 4. +// +// SSKDF requires an auxiliary function H. This can be approved hash function, +// HMAC with an approved hash function, or KMAC. The approved hash functions +// are listed in SP800-56C section 7. +// +// A salt value may be optionally provided if either HMAC or KMAC is used for H. +// When no salt is provided, an all-zero default salt is used instead. For HMAC, +// the default salt is the length of an input block of the HMAC's hash function. +// For KMAC128, the default salt is 164 bytes. For KMAC256, the default salt is 132 bytes. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSskdfMacExpandSalt( + _Out_ PSYMCRYPT_SSKDF_MAC_EXPANDED_SALT pExpandedSalt, + _In_ PCSYMCRYPT_MAC macAlgorithm, + _In_reads_opt_(cbSalt) PCBYTE pbSalt, + SIZE_T cbSalt); +// +// Initializes *pExpandedSalt with the macAlgorithm, and optionally the salt. Used +// for SSKDF when H is a MAC function. After calling SymCryptSskdfMacExpandSalt, +// SymCryptSskdfMacDerive can be called multiple times to generate keys for different +// uses, fixed infos, and shared secrets. For multiple KDFs using the same MAC and salt, +// calling SymCryptSskdfMacExpandSalt once and SymCryptSskdfMacDerive multiple times +// is more efficient than calling SymCryptSskdfMac multiple times. +// +// The expanded salt contains no secrets and does not need to be wiped. +// +// Parameters: +// - pExpandedSalt : Pointer to a SYMCRYPT_SSKDF_MAC_EXPANDED_SALT structure that +// will contain the expanded salt after the function returns. +// - macAlgorithm : MAC algorithm that will be used in the key derivation. +// This function is saved in SYMCRYPT_SSKDF_MAC_EXPANDED_SALT. +// - pbSalt, cbSalt : Buffer containing the salt for the KDF. cbSalt must be a valid +// key size for the MAC algorithm. If pbSalt is NULL, the default +// all zero-byte salt is used. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSskdfMacDerive( + _In_ PCSYMCRYPT_SSKDF_MAC_EXPANDED_SALT pExpandedSalt, + SIZE_T cbMacOutputSize, + _In_reads_(cbSecret) PCBYTE pbSecret, + SIZE_T cbSecret, + _In_reads_opt_(cbInfo) PCBYTE pbInfo, + SIZE_T cbInfo, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult); +// +// Derive keys using the expanded salt that was initialized with SymCryptSskdfMacExpandSalt +// along with other inputs. This function can be called consecutively with varying fixed infos +// and shared secrets to generate keys for different purposes as defined in the SP800-56C. +// The same pbExpandedKey can be used simultaneously by multiple threads. +// +// Parameters: +// - pExpandedSalt : Pointer to a SYMCRYPT_SSKDF_MAC_EXPANDED_SALT structure that is +// initialized by a prior call to SymCryptSskdfMacExpandSalt. +// - cbMacOutputSize : Output size used by the MAC algorithm for intermediate computations. Must not be +// greater than 64 bytes. Set to 0 for MACs that don't support variable output sizes, +// or to use the default output size. The default output size when KMAC is used is cbResult. +// - pbSecret, cbSecret : Buffer containing the shared secret. +// - pbInfo, cbInfo : Buffer containing the fixed info. +// - pbResult, cbResult : Buffer to store the derived key. Exactly cbResult bytes of output will be generated. +// Must not exceed 2^{32} - 1 times the result size of the MAC algorithm. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSskdfMac( + _In_ PCSYMCRYPT_MAC macAlgorithm, + SIZE_T cbMacOutputSize, + _In_reads_(cbSecret) PCBYTE pbSecret, + SIZE_T cbSecret, + _In_reads_opt_(cbSalt) PCBYTE pbSalt, + SIZE_T cbSalt, + _In_reads_opt_(cbInfo) PCBYTE pbInfo, + SIZE_T cbInfo, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult); +// +// This function is a wrapper for using SymCryptSskdfMacExpandSalt followed by SymCryptSskdfMacDerive +// in order to produce SSKDF output. +// +// All of the function arguments are forwarded to SymCryptSskdfMacExpandSalt and SymCryptSskdfMacDerive +// functions, hence the documentation on those functions apply here as well. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSskdfHash( + _In_ PCSYMCRYPT_HASH hashAlgorithm, + SIZE_T cbHashOutputSize, + _In_reads_(cbSecret) PCBYTE pbSecret, + SIZE_T cbSecret, + _In_reads_opt_(cbInfo) PCBYTE pbInfo, + SIZE_T cbInfo, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult); +// +// Derive keys using the specified hash algorithm as H. +// +// Parameters: +// - hashAlgorithm : Hash algorithm that will be used in the key derivation. +// - cbHashOutputSize : Output size used by the hash algorithm for intermediate computations. +// Set to 0 for hashes that don't support variable output sizes, or to use +// the default output size. Currently, no allowed hash algorithms support +// variable output sizes, so this should always be set to 0. +// - pbSecret, cbSecret : Buffer containing the shared secret. +// - pbInfo, cbInfo : Buffer containing the fixed info. +// - pbResult, cbResult : Buffer to store the derived key. Exactly cbResult bytes of output will be generated. +// Must not exceed 2^{32} - 1 times the result size of hashAlgorithm. +// + +VOID +SYMCRYPT_CALL +SymCryptSskdfSelfTest(void); + +//========================================================================== +// RNG ALGORITHMS +//========================================================================== + +//////////////////////////////////////////////////////////////////////////// +// AES-CTR-DRBG +// +// This is an implementation of AES-CTR_DRBG as specified in SP 800-90. +// It always uses a 256-bit security strength. +// +// Note: This RNG is NOT compliant with FIPS 140-2 as it lacks the continuous +// self test required by FIPS 140-2. See the AES-FIPS RNG algorithm below. +// +// SYMCRYPT_RNG_AES_STATE +// State of an AES-CTR_DRBG instance. +// + +#define SYMCRYPT_RNG_AES_MIN_INSTANTIATE_SIZE (32 + 16) +#define SYMCRYPT_RNG_AES_MIN_RESEED_SIZE (32) +#define SYMCRYPT_RNG_AES_MAX_SEED_SIZE (256) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRngAesInstantiate( + _Out_ PSYMCRYPT_RNG_AES_STATE pRngState, + _In_reads_(cbSeedMaterial) PCBYTE pcbSeedMaterial, + + _In_range_(SYMCRYPT_RNG_AES_MIN_INSTANTIATE_SIZE, SYMCRYPT_RNG_AES_MAX_SEED_SIZE) + SIZE_T cbSeedMaterial ); +// +// Initialize a new SYMCRYPT_RNG_AES_STATE, and seed it with the seed material. +// +// 'Instantiate' is the SP800-90 terminology. +// The seed material must be at least SYMCRYPT_RNG_AES_MIN_INSTANTIATE_SIZE bytes, +// and at most SYMCRYPT_RNG_AES_MAX_SEED_SIZE bytes. +// +// This implementation always uses 256-bit security strength, and +// does not support 'prediction resistance' as defined in SP 800-90. +// +// SP 800-90 specifies three inputs to the instantiation: +// - entropy +// - nonce +// - personalization string +// This function takes only a single input, which is the concatenation of these three: +// seed material := entropy | nonce | personalization string +// +// The following are the requirements on the three inputs: +// Entropy: must have at least 256 bits of entropy +// Nonce: must either be a random value with 128-bits of entropy, or a value that does not +// repeat with a probability of more than 2^{-128}. +// Together these requirements imply that cbSeedMaterial should be at least +// SYMCRYPT_RNG_AES_MIN_INSTANTIATE_SIZE +// +// This function only returns an error if the cbSeedMaterial value is out of range. +// + +VOID +SYMCRYPT_CALL +SymCryptRngAesGenerate( + _Inout_ PSYMCRYPT_RNG_AES_STATE pRngState, + _Out_writes_(cbRandom) PBYTE pbRandom, + SIZE_T cbRandom ); +// +// Generate random output from the state. +// +// Callers do not need to limit themselves to requests of 64 kB or less; +// large requests are split internally to follow the request size limitations of SP 800-90. +// +// SP 800-90 also requires a limit on the # generate calls that can be done between reseeds. +// For AES-CTR_DRBG this limit is 2^48, which means it is all but impossible to hit this limit. +// If the caller were to succeed, the 2^48'th call will result in a fatal error. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRngAesReseed( + _Inout_ PSYMCRYPT_RNG_AES_STATE pRngState, + _In_reads_(cbSeedMaterial) PCBYTE pcbSeedMaterial, + + _In_range_(SYMCRYPT_RNG_AES_MIN_RESEED_SIZE, SYMCRYPT_RNG_AES_MAX_SEED_SIZE) + SIZE_T cbSeedMaterial ); +// +// Reseed the PRNG state. +// +// The seed material consists of the concatenation of the following SP800-90 fields: +// - entropy +// - additional input +// +// The entropy input should have at least 256 bits of entropy. +// This function only returns an error if the cbSeedMaterial value is out of range. +// + +VOID +SYMCRYPT_CALL +SymCryptRngAesUninstantiate( + _Inout_ PSYMCRYPT_RNG_AES_STATE pRngState ); +// +// Uninstantiate (clean up) the PRNG state +// + +VOID +SYMCRYPT_CALL +SymCryptRngAesInstantiateSelftest(void); +// +// For FIPS-certified modules, this function should be called before every instantiation. +// If multiple DRBGs are instantiated 'in quick succession', a single self-test is sufficient +// (see SP 800-90 11.3.2). +// + + +VOID +SYMCRYPT_CALL +SymCryptRngAesReseedSelftest(void); +// +// FIPS-certified modules should call this function before every call to the reseed function. +// + +VOID +SYMCRYPT_CALL +SymCryptRngAesGenerateSelftest(void); +// +// FIPS-certified modules should call this function at least once on startup, and whenever +// they want to re-test the generate function. +// + +//////////////////////////////////////////////////////////////////////////// +// AES-CTR-DRBG with FIPS 140-2 continuous self-test +// +// This is a straightforward wrapper around the AES-CTR-DRBG implementation +// that adds the FIPS 140-2 continuous self-test. +// At the moment, it looks like this test will not be present in FIPS 140-3 so +// this RNG will be dropped when FIPS 140-3 comes out. +// The self-test requirements are met by calling the selftest functions of the +// AES-CTR_DRBG implementation directly. +// +// These functions are functionally equivalent to the ones for AES-CTR_DRBG. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRngAesFips140_2Instantiate( + _Out_ PSYMCRYPT_RNG_AES_FIPS140_2_STATE pRngState, + _In_reads_(cbSeedMaterial) PCBYTE pcbSeedMaterial, + + _In_range_(SYMCRYPT_RNG_AES_MIN_INSTANTIATE_SIZE, SYMCRYPT_RNG_AES_MAX_SEED_SIZE) + SIZE_T cbSeedMaterial ); + +VOID +SYMCRYPT_CALL +SymCryptRngAesFips140_2Generate( + _Inout_ PSYMCRYPT_RNG_AES_FIPS140_2_STATE pRngState, + _Out_writes_(cbRandom) PBYTE pbRandom, + SIZE_T cbRandom ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRngAesFips140_2Reseed( + _Inout_ PSYMCRYPT_RNG_AES_FIPS140_2_STATE pRngState, + _In_reads_(cbSeedMaterial) PCBYTE pcbSeedMaterial, + + _In_range_(SYMCRYPT_RNG_AES_MIN_RESEED_SIZE, SYMCRYPT_RNG_AES_MAX_SEED_SIZE) + SIZE_T cbSeedMaterial ); + +VOID +SYMCRYPT_CALL +SymCryptRngAesFips140_2Uninstantiate( + _Inout_ PSYMCRYPT_RNG_AES_FIPS140_2_STATE pRngState ); + +//////////////////////////////////////////////////////////////////////////////////////////// +// +// Internal RNG functions +// +// To satisfy FIPS 140-3 and SP 800-90B, certain modules of SymCrypt may set up internal +// RNG state(s) to keep random bit generation behind the module's FIPS boundary. +// These functions allow the caller to get random bits and provide entropy, respectively, +// to SymCrypt's internal RNG state(s). +// Implementation is module dependent, and these functions may not be defined +// for certain modules. Check before using. +// + +VOID +SYMCRYPT_CALL +SymCryptRandom( + _Out_writes_(cbRandom) PBYTE pbRandom, + SIZE_T cbRandom ); +// Fills pbRandom with cbRandom random bytes + +VOID +SYMCRYPT_CALL +SymCryptProvideEntropy( + _In_reads_(cbEntropy) PCBYTE pbEntropy, + SIZE_T cbEntropy ); +// Mixes pbEntropy into the internal RNG state. There may be module-specific limits on +// cbEntropy - check module before use + + +//////////////////////////////////////////////////////////////////////////////////////////// +// +// RdRand support +// These functions provide access to the RdRand random number generator in +// the latest Intel CPUs. +// The DRBG that underlies the RdRand instruction is limited to 128-bit security. +// The seed for each consecutive 8 kB of data can be recovered in 2^128 work. +// Therefore, we allow for multiple blocks of 8 kB to be gathered in an attempt to +// extract 256-bit security from the hardware. +// In general, to achieve N*128 bits of security, you should use a buffer of +// (N+1)*SYMCRYPT_RDRAND_RESEED_SIZE bytes. +// + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + +// The RdRand instruction reseeds its internal DRBG every 8 kB (or faster) +#define SYMCRYPT_RDRAND_RESEED_SIZE (1<<13) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRdrandStatus(void); +// +// Returns SYMCRYPT_NO_ERROR if RdRand is available. +// returns SYMCRYPT_NOT_IMPLEMENTED if RdRand is not available. +// Note: the library must be initialized before you call this function. +// + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRdrandGetBytes( + _Out_writes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + _Out_writes_( SYMCRYPT_SHA512_RESULT_SIZE ) PBYTE pbResult ); +// +// Gets cbBuffer bytes from the RdRand instruction and hashes them to the pbResult buffer. +// pbBuffer points to a scratch buffer that is used internally, but wiped upon exit. +// cbBuffer must be a multiple of 16. +// Fatal error if SymCryptRdrandStatus indicates that Rdrand is not available. +// Returns an error if the RdRand instruction failed consistently. +// Note: SymCrypt only checks whether RdRand self-reports as failing. SymCrypt does NOT attempt +// to validate that the values returned in successful RdRand calls are in fact random. +// See SymCryptRdrandGet for a version that does not return an error but fatals instead. +// + +VOID +SYMCRYPT_CALL +SymCryptRdrandGet( + _Out_writes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + _Out_writes_( SYMCRYPT_SHA512_RESULT_SIZE ) PBYTE pbResult ); +// +// Gets cbBuffer bytes from the RdRand instruction and hashes them to the pbResult buffer. +// pbBuffer points to a scratch buffer that is used internally, but wiped upon exit. +// cbBuffer must be a multiple of 16. +// Fatal error if the RdRand instruction fails. +// Note: SymCrypt only checks whether RdRand self-reports as failing. SymCrypt does NOT attempt +// to validate that the values returned in successful RdRand calls are in fact random. +// + +#endif + + +//////////////////////////////////////////////////////////////////////////////////////////// +// +// RdSeed support +// These functions provide access to the RdSeed random number generator in +// recent Intel CPUs. +// + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRdseedStatus(void); +// +// Returns SYMCRYPT_NO_ERROR if RdSeed is available. +// returns SYMCRYPT_NOT_IMPLEMENTED if RdSeed is not available. +// Note: the library must be initialized before you call this function. +// + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRdseedGetBytes( + _Out_writes_( cbResult ) PBYTE pbResult, + SIZE_T cbResult ); +// +// Queries cbResult bytes from the Rdseed instruction and puts them in the buffer. +// The number of bytes (cbResult) must be a multiple of 16. +// Fatal error if the Rdseed instruction is not present. +// Returns an error if the Rdseed instruction fails consistently. +// Note: SymCrypt only checks whether Rdseed self-reports as failing. SymCrypt does NOT attempt +// to validate that the values returned in successful Rdseed calls are in fact random. +// See SymCryptRdseedGet for a version that does not return an error but fatals instead. +// + +VOID +SYMCRYPT_CALL +SymCryptRdseedGet( + _Out_writes_( cbResult ) PBYTE pbResult, + SIZE_T cbResult ); +// +// Queries cbResult bytes from the Rdseed instruction and puts them in the buffer. +// The number of bytes (cbResult) must be a multiple of 16. +// Fatal error if the Rdseed instruction is not present, or the instruction fails consistently. +// Note: SymCrypt only checks whether Rdseed self-reports as failing. SymCrypt does NOT attempt +// to validate that the values returned in successful Rdseed calls are in fact random. +// + +#endif + +//////////////////////////////////////////////////////////////////////////////////////////// +// +// AES-XTS +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXtsAesExpandKey( + _Out_ PSYMCRYPT_XTS_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( cbKey ) PCBYTE pbKey, + SIZE_T cbKey ); +// Note that this key expansion function does not perform FIPS checks for backwards compatibility. +// Use SymCryptXtsAesExpandKeyEx for FIPS-approved XTS key expansion. + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXtsAesExpandKeyEx( + _Out_ PSYMCRYPT_XTS_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( cbKey ) PCBYTE pbKey, + SIZE_T cbKey, + UINT32 flags ); +// Allowed flags: +// +// - SYMCRYPT_FLAG_KEY_NO_FIPS +// Opt-out of performing validation required for FIPS. +// Currently this is just checking that 2 AES keys used in XTS are non-equal. + +VOID +SYMCRYPT_CALL +SymCryptXtsAesKeyCopy( + _In_ PCSYMCRYPT_XTS_AES_EXPANDED_KEY pSrc, + _Out_ PSYMCRYPT_XTS_AES_EXPANDED_KEY pDst ); +// +// Create a copy of an expanded key +// + +VOID +SYMCRYPT_CALL +SymCryptXtsAesEncrypt( + _In_ PCSYMCRYPT_XTS_AES_EXPANDED_KEY pExpandedKey, + SIZE_T cbDataUnit, + UINT64 tweak, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); +// +// Encrypt a buffer using XTS-AES and 64 bit tweak. +// - pExpandedKey points to the expanded key for XTS. +// - cbDataUnit: size of each data unit, must be at least 16 and cannot exceed 2^{24} bytes. Typically 512. +// - tweak: 64 bit tweak value used for the first data unit in the buffer, incremented for subsequent data units. +// - pbSrc: plaintext input +// - pbDst: ciphertext output. The ciphertext buffer may be identical to the plaintext +// buffer, or non-overlapping. The ciphertext is also cbData bytes long. +// - cbData: # bytes of plaintext input. Must be a multiple of cbDataUnit. +// +// XTS-AES works on equal-sized data units, with each data unit being uniquely encrypted using a combination of +// an integer "tweak" value and the XTS key (a pair of AES keys). A data unit typically corresponds to a sector +// size on a disk. +// +// This API encrypts a buffer consisting of several consecutive data units, which use consecutive tweak values. +// As the tweak is 64 bits, if there is an overflow of 64 bits, the value of the tweak will wrap to 0. +// +// i.e. encryption with tweak 0xffffffffffffffff for a buffer consisting of 2 data units will correspond to: +// encryption using tweak 0xffffffffffffffff for the first data unit, +// encryption using tweak 0x0000000000000000 for the second data unit +// +// Note, using cbDataUnit which is a power of 2 >= 256, will likely be more performant. +// + +VOID +SYMCRYPT_CALL +SymCryptXtsAesDecrypt( + _In_ PCSYMCRYPT_XTS_AES_EXPANDED_KEY pExpandedKey, + SIZE_T cbDataUnit, + UINT64 tweak, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); +// +// Decrypt a buffer using XTS-AES and 64 bit tweak. +// See SymCryptXtsAesEncrypt for a more in depth description, everything is the same, only this decrypts rather than encrypts. +// + +VOID +SYMCRYPT_CALL +SymCryptXtsAesEncryptWith128bTweak( + _In_ PCSYMCRYPT_XTS_AES_EXPANDED_KEY pExpandedKey, + SIZE_T cbDataUnit, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PCBYTE pbTweak, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); +// +// Encrypt a buffer using XTS-AES and 128 bit tweak. +// - pExpandedKey points to the expanded key for XTS. +// - cbDataUnit: size of each data unit, must be at least 16 and cannot exceed 2^{24} bytes. Typically 512. +// - pbTweak: 128 bit tweak value used for the first data unit in the buffer, incremented for subsequent data units. +// - pbSrc: plaintext input +// - pbDst: ciphertext output. The ciphertext buffer may be identical to the plaintext +// buffer, or non-overlapping. The ciphertext is also cbData bytes long. +// - cbData: # bytes of plaintext input. Must be a multiple of cbDataUnit. +// +// XTS-AES works on equal-sized data units, with each data unit being uniquely encrypted using a combination of +// an integer "tweak" value and the XTS key (a pair of AES keys). A data unit typically corresponds to a sector +// size on a disk. +// +// This API encrypts a buffer consisting of several consecutive data units, which use consecutive tweak values. +// As the tweak is 128 bits, if there is an overflow of 128 bits, the value of the tweak will wrap to 0. +// +// i.e. encryption with tweak 0x0000000000000000ffffffffffffffff for a buffer consisting of 2 data units will correspond to: +// encryption using tweak 0x0000000000000000ffffffffffffffff for the first data unit, +// encryption using tweak 0x00000000000000010000000000000000 for the second data unit +// but encryption with tweak 0xffffffffffffffffffffffffffffffff for a buffer consisting of 2 data units will correspond to: +// encryption using tweak 0xffffffffffffffffffffffffffffffff for the first data unit, +// encryption using tweak 0x00000000000000000000000000000000 for the second data unit +// +// Note, using cbDataUnit which is a power of 2 >= 256, will likely be more performant. +// + +VOID +SYMCRYPT_CALL +SymCryptXtsAesDecryptWith128bTweak( + _In_ PCSYMCRYPT_XTS_AES_EXPANDED_KEY pExpandedKey, + SIZE_T cbDataUnit, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PCBYTE pbTweak, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); +// +// Decrypt a buffer using XTS-AES and 128 bit tweak. +// See SymCryptXtsAesEncryptWith128bTweak for a more in depth description, everything is the same, only this decrypts rather than encrypts. +// + +VOID +SYMCRYPT_CALL +SymCryptXtsAesSelftest(void); + +//////////////////////////////////////////////////////////////////////////////////////////// +// +// AES-KW and AES-KWP +// +// These are the AES-KW and AES-KWP algorithms per SP 800-38F. +// +// These are very slow compared to most AES modes, requiring a long serial chain of AES +// block encryption/decryptions, with a best case cost comparable to ~12x AES-CBC encryption +// for a given buffer size. In practice the cost is often higher. +// These cipher modes are not recommended. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptAesKwEncrypt( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_(cbSrc) PCBYTE pbSrc, + SIZE_T cbSrc, + _Out_writes_to_(cbDst, *pcbResult) PBYTE pbDst, + SIZE_T cbDst, + _Out_ SIZE_T* pcbResult ); +// +// Encrypt a buffer using AES-KW mode. +// +// - pExpandedKey points to the expanded key to use. +// - pbSrc is the plaintext source buffer. The source and destination buffers may be +// identical (in-place encryption) or non-overlapping, but they may not partially overlap. +// - cbSrc. # bytes of plaintext. This must be a multiple of 8, >=16, and <2^31. +// - pbDst is the ciphertext destination buffer. The source and destination buffers may be +// identical (in-place encryption) or non-overlapping, but they may not partially overlap. +// - cbDst. # bytes in the destination buffer. This must be >= cbSrc+8. +// - pcbResult pointer to a variable which receives the length of the ciphertext written to pbDst. +// +// Returns: +// SYMCRYPT_INVALID_ARGUMENT : If cbSrc is an invalid size +// SYMCRYPT_BUFFER_TOO_SMALL : If cbDst is not large enough +// (this can always be avoided if cbDst >= cbSrc+8) +// SYMCRYPT_MEMORY_ALLOCATION_FAILURE : If there is insufficient memory for the operation +// SYMCRYPT_NO_ERROR : On success +// +// Remarks: +// The standard allows larger plaintexts but there is no requirement to support them, we only support +// plaintext up to 2^31 bytes because it avoids complexity in handling overflow of 32b buffer sizes, and +// is larger than practically necessary. +// The output parameters (pbDst and pcbResult) are only set on success. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptAesKwDecrypt( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_(cbSrc) PCBYTE pbSrc, + SIZE_T cbSrc, + _Out_writes_to_(cbDst, *pcbResult) PBYTE pbDst, + SIZE_T cbDst, + _Out_ SIZE_T* pcbResult ); +// +// Decrypt a buffer using AES-KW mode. +// +// - pExpandedKey points to the expanded key to use. +// - pbSrc is the ciphertext source buffer. The source and destination buffers may be +// identical (in-place decryption) or non-overlapping, but they may not partially overlap. +// - cbSrc. # bytes of ciphertext. This must be a multiple of 8, >=24, and <=2^31. +// - pbDst is the plaintext destination buffer. The source and destination buffers may be +// identical (in-place decryption) or non-overlapping, but they may not partially overlap. +// - cbDst. # bytes in the destination buffer. This must be >= cbSrc-8. +// - pcbResult pointer to a variable which receives the length of the plaintext written to pbDst. +// +// Returns: +// SYMCRYPT_INVALID_ARGUMENT : If cbSrc is an invalid size +// SYMCRYPT_BUFFER_TOO_SMALL : If cbDst is not large enough +// (this can always be avoided if cbDst >= cbSrc-8) +// SYMCRYPT_AUTHENTICATION_FAILURE : If pbSrc does not decrypt successfully +// SYMCRYPT_MEMORY_ALLOCATION_FAILURE : If there is insufficient memory for the operation +// SYMCRYPT_NO_ERROR : On success +// +// Remarks: +// The standard allows larger plaintexts but there is no requirement to support them, we only support +// plaintext up to 2^31 bytes because it avoids complexity in handling overflow of 32b buffer sizes, and +// is larger than practically necessary. +// The output parameters (pbDst and pcbResult) are only set on success. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptAesKwpEncrypt( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_(cbSrc) PCBYTE pbSrc, + SIZE_T cbSrc, + _Out_writes_to_(cbDst, *pcbResult) PBYTE pbDst, + SIZE_T cbDst, + _Out_ SIZE_T* pcbResult ); +// +// Encrypt a buffer using AES-KWP mode. +// +// - pExpandedKey points to the expanded key to use. +// - pbSrc is the plaintext source buffer. The source and destination buffers may be +// identical (in-place encryption) or non-overlapping, but they may not partially overlap. +// - cbSrc. # bytes of plaintext. This must be >0 and <=2^31-8. +// - pbDst is the ciphertext destination buffer. The source and destination buffers may be +// identical (in-place encryption) or non-overlapping, but they may not partially overlap. +// - cbDst. # bytes in the destination buffer. This must be >= cbSrc + 16 - (cbSrc%8) - ((cbSrc%8)==0 ? 8 : 0) +// - pcbResult pointer to a variable which receives the length of the ciphertext written to pbDst. +// +// Returns: +// SYMCRYPT_INVALID_ARGUMENT : If cbSrc is an invalid size +// SYMCRYPT_BUFFER_TOO_SMALL : If cbDst is not large enough +// (this can always be avoided if cbDst >= cbSrc+15) +// SYMCRYPT_MEMORY_ALLOCATION_FAILURE : If there is insufficient memory for the operation +// SYMCRYPT_NO_ERROR : On success +// +// Remarks: +// The standard allows larger plaintexts but there is no requirement to support them, we only support +// plaintext up to 2^31 bytes because it avoids complexity in handling overflow of 32b buffer sizes, and +// is larger than practically necessary. +// The output parameters (pbDst and pcbResult) are only set on success. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptAesKwpDecrypt( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_(cbSrc) PCBYTE pbSrc, + SIZE_T cbSrc, + _Out_writes_to_(cbDst, *pcbResult) PBYTE pbDst, + SIZE_T cbDst, + _Out_ SIZE_T* pcbResult ); +// +// Decrypt a buffer using AES-KWP mode. +// +// - pExpandedKey points to the expanded key to use. +// - pbSrc is the ciphertext source buffer. The source and destination buffers may be +// identical (in-place decryption) or non-overlapping, but they may not partially overlap. +// - cbSrc. # bytes of ciphertext. This must be a multiple of 8, >=16, and <=2^31. +// - pbDst is the plaintext destination buffer. The source and destination buffers may be +// identical (in-place decryption) or non-overlapping, but they may not partially overlap. +// - cbDst. # bytes in the destination buffer. This must be large enough to fit the plaintext, +// a valid plaintext length is in the range [cbSrc-15, cbSrc-8]. If cbDst >= cbSrc-8 then the +// destination buffer is guaranteed to be large enough. +// - pcbResult pointer to a variable which receives the length of the plaintext written to pbDst. +// +// Returns: +// SYMCRYPT_INVALID_ARGUMENT : If cbSrc is an invalid size +// SYMCRYPT_BUFFER_TOO_SMALL : If cbDst is not large enough +// (this can always be avoided if cbDst >= cbSrc-8) +// SYMCRYPT_AUTHENTICATION_FAILURE : If pbSrc does not decrypt successfully +// SYMCRYPT_MEMORY_ALLOCATION_FAILURE : If there is insufficient memory for the operation +// SYMCRYPT_NO_ERROR : On success +// +// Remarks: +// The standard allows larger plaintexts but there is no requirement to support them, we only support +// plaintext up to 2^31 bytes because it avoids complexity in handling overflow of 32b buffer sizes, and +// is larger than practically necessary. +// The output parameters (pbDst and pcbResult) are only set on success. +// +// If we fail to decrypt due to bad data, we return SYMCRYPT_AUTHENTICATION_FAILURE in constant time with +// respect to how the decrypted data is corrupted. While there is no known attack on AES-KWP abusing +// differential timing of different failure cases, being constant time for this is cheap, so is a reasonable +// hardening measure. +// +// On success we do not attempt to hide the plaintext length from sidechannels, as this could make it hard +// for callers with known plaintext length to use precisely sized buffers to decrypt into (i.e. caller +// knows the valid plaintext is 15 bytes but the API would require caller to provide a 16 byte pbDst). It +// is expected that in any real use case the length of the plaintext would immediately be used to import the +// unwrapped key into some other piece of code - so attempting to obscure the plaintext length would not be +// of any benefit. +// + + +//////////////////////////////////////////////////////////////////////////////////////////// +// +// TLS CBC cipher suites HMAC verification +// +// The TLS cipher suites for block cipher modes (typically CBC) are designed in an unfortunate way. +// The format is: +// Plaintext | MAC | <padding> | <padding_length> +// Which is then encrypted by the block cipher. +// Plaintext is the data being transferred. MAC is the HMAC value over some header data and the plaintext. +// The padding_length is a byte (range 0-255) that specifies the length of the padding. +// The padding consists of padding_length bytes (up to 255) Each byte is equal to padding_length. +// The padding_length is chosen so that length of the whole structure is a multiple of the block cipher block +// size, so that it can be encrypted with CBC. +// +// The problem is that when decrypting this, the natural code will take actions that depend on the padding_length +// byte before it has been authenticated, and those actions might reveal information about padding_byte. This +// in turn can be used in an attack that lets the attacker decrypt data. +// We are particularly concerned with software side channels, where another thread infers information about what the +// active thread is doing through cache state and other shared CPU state. +// +// To address this issue once and for all, we created an implementation of the HMAC verification with the following +// properties: +// - It verifies the HMAC in the data structure above. +// - This is done in a side-channel safe manner, not revealing anything except whether the structure is valid or not. +// This means that the HMAC computation over the plaintext is constant-time and constant-memory-access pattern +// irrespective of the padding_length; thus this is a fixed-time implementation for variable-sized inputs. +// Similarly, the MAC value has to be extracted from a variable location in the input using a fixed memory access +// pattern. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptTlsCbcHmacVerify( + _In_ PCSYMCRYPT_MAC pMacAlgorithm, + _In_ PVOID pExpandedKey, + _Inout_ PVOID pState, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData); +// Verify a TLS CBC cipher suite MAC value +// - macAlgorithm: one of SymCryptHmacSha1Algorithm, SymCryptHmacSha256Algorithm, or SymCryptHmacSha384Algorithm. +// Other MAC algorithms are not supported. +// - pState points to an SYMCRYPT_HMAC_SHAXXX_STATE. It is allowed to process data into the state before this call, +// but the total # bytes processed must be < 2^16. +// - pbData points to a buffer containing the concatenation of plaintext, MAC, padding, and padding_length. +// - cbData is the size of the buffer. +// Note: callers should pass the entire (plaintext | MAC | padding | padding_length) in a single call to get +// the full side-channel protection. +// This function returns success if the HMAC verification is successful. +// It returns an error if the padding or HMAC verification fails. +// After the call pState is wiped of any sensitive data, just like the SymCryptHmacXxxResult function. +// Callers have to check the padding_length byte pbData[cbData-1] to determine the size of the plaintext. +// + + + +/* + +Yes, despite its name, SymCrypt supports asymmetric cryptographic algorithms. +The asymmetric implementations have the following primary design goals: + - Implement asymmetric cryptographic algorithms like RSA, DSA, DH, ECDSA, ECDH, etc. + - Protect against all software-based side-channel attacks + - Protect against those hardware-based side-channel attacks that can be practically protected against in software. + - High performance, dynamically using CPU features that are available on the current CPU stepping. + - Support small code and small memory environments. + - Support environments that need to control memory allocations. + +The primary use-case is for SymCrypt to be the crypto library for MS products. This includes high-performance +scenarios such as TLS server termination, and low-footprint uses such as Bootmgr. +SymCrypt supports applications such as firmware updates for embedded CPUs where code and memory +footprint are of overriding importance. + +Side channel attacks: +Defence against side channel attacks play an important part in the design and implementation of +SymCrypt. Side channel attacks are a class of attacks on cryptographic systems where the attacker +gets some information about a cryptographic computation in addition to the inputs and outputs. +For example, any of the following information could be retrieved by the attacker: +- The time it takes to perform a computation (either exactly or approximately) +- The power usage over time of the CPU. +- The noise made by the computer's power supply (a function of the CPU power consumption) +- Which cache lines are evicted from the attacker's thread A by a computation in thread B. +These may sound like esoteric attacks, but all of them have been used in practical demonstrations +to attack cryptographic systems. + +SymCrypt uses the following API rules to protect against side-channel attacks: +- Information is divided into two classes: public information and private information. +- Public information is allowed to leak through side channels, and the library makes no attempt to hide + public information. +- Private information is protected against side-channel attacks to the best ability of the library. +Unless otherwise documented, all information is treated as private. +Functions may document that a particular value is "published". This means that the function may use +the value in a way that is not side-channel safe, so any security analysis that considers +side-channel attacks must assume that the published value is public and known by the attacker. + +The following information is always assumed to be public, and thus known to any side-channel attacker: +- Which SymCrypt function is being called. +- The location of any of the buffers passed as arguments. +- The size parameter of any buffer passed as an argument. +- Any details that cause a function to return an error. +Thus, it is important that callers who wish to be side-channel safe ensure that their buffer locations and sizes +do not reveal any information, and that they do not make any calls that result in an error, unless there is no +need for secrecy when an error occurs. + +Because pointer values are all public (the memory address cannot be hidden on modern CPUs if the buffer is accessed) +side-channel safe code ends up using masked operations, such as masked-copy where the copy is done or not done +depending on a mask parameter to the function. +SymCrypt exposes a set of masked functions that applications can use for their own side-channel safe operations. + +The following coding rules are used to protect private information: +- The sequence of instructions executed is independent of private information. +- The sequence of memory operations (read/write) and memory addresses accessed is independent of private information. +- Private information is not used in instructions whose timing may depend on the data being processed. +As far as we know these rules stop all software-based side-channel attacks, and many hardware-based ones. + +One remaining line of attack is to feed the algorithm with values that are special. For example, an RSA +decryption may receive a value that contains many zeroes modulo one prime. If the power consumption of the +multiply instruction reveals whether one of the multiplicands is zero, then the attacker might learn +useful information. Note that this is a pure hardware attack, it is not applicable to software attackers. +Protecting against this style of attack is an area that still needs more research. Where applicable we +document the additional protections that SymCrypt provides. + + +Running with CHKed code: +All binaries that use SymCrypt must build CHKed versions of the binary (linking the CHKed version of SymCrypt) +and perform full test runs on the CHKed version. +Due to the performance and operational requirements, the production-optimized SymCrypt library API cannot +check all buffer sizes or even be fully SAL-annotated. +The necessary size information is simply not available at every call point, and passing +the size information around would add too much overhead. +The CHKed version of the library adds additional code & per-object storage to be able to implement check that +are broadly equivalent to what SAL would normally check. +SAL checks are part of the SDL requirements and need to be done on all Microsoft products. +Though this requirement cannot strictly speaking be satisfied with the SymCrypt library, running the CHKed +version through full validation is the best equivalent, and therefore should be considered mandatory. + +Please ensure that the validation runs exercise all the border-cases of largest and smallest sizes, as well as +intermediate sizes for the parameters. + +*/ + + +// +// Caller-provided functions +// +// Some of the large-integer and asymmetric algorithm functions use callbacks. +// The callback functions do not have to be functional for binaries that only use the symmetric algorithm +// implementations. +// Use of callbacks is documented in each function that uses them. +// + +PVOID +SYMCRYPT_CALL +SymCryptCallbackAlloc( SIZE_T nBytes ); +// +// Allocate a buffer of nBytes; returns NULL on failure. +// Returned pointer must be aligned to a multiple of SYMCRYPT_ASYM_ALIGN_VALUE. +// + +VOID +SYMCRYPT_CALL +SymCryptCallbackFree( PVOID pMem ); +// +// Called by SymCrypt to free a buffer previously allocated by SymCryptCallbackAlloc(). +// Note that callers should never call these functions directly. Buffers that were returned +// from the SymCrypt API are freed with SymCryptFree* functions, not this function. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SYMCRYPT_WEAK_SYMBOL +SymCryptCallbackRandom( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer ); +// +// Fill the buffer with uniformly distributed random bytes from a cryptographically strong RNG source. +// + +PVOID +SYMCRYPT_CALL +SymCryptCallbackAllocateMutexFastInproc(void); +// +// Allocate and initialize a mutex object; returns NULL on failure. +// +// Fast indicates that users of the mutex will only hold it for a short period of time, so it +// is not expected that threads should need to sleep before acquiring the mutex. (i.e. can be +// implemented by a spinlock in kernel mode). +// Inproc indicates the mutex is only used for synchronization between threads in a single process. +// +// Users of the library in contexts where mutexes are not available can set this callback to always +// return NULL, and attempts to use APIs requiring it will fail at runtime. +// + +VOID +SYMCRYPT_CALL +SymCryptCallbackFreeMutexFastInproc( _Inout_ PVOID pMutex ); +// +// Free a mutex object previously created by SymCryptCallbackAllocateMutexFastInproc +// + +VOID +SYMCRYPT_CALL +SymCryptCallbackAcquireMutexFastInproc( _Inout_ PVOID pMutex ); +// +// Take exclusive ownership of a mutex object allocated by SymCryptCallbackAllocateMutexFastInproc. +// +// This call must also ensure memory ordering such that stores before the previous call to +// SymCryptCallbackReleaseMutexFastInproc with this mutex are observable by loads after this call. +// + +VOID +SYMCRYPT_CALL +SymCryptCallbackReleaseMutexFastInproc( _Inout_ PVOID pMutex ); +// +// Relinquish ownership of a mutex object allocated by SymCryptCallbackAllocateMutexFastInproc and +// acquired by SymCryptCallbackAcquireMutexFastInproc. +// + +//============================================================================================== +// Object types for high-level API +// +// SYMCRYPT_RSAKEY A key that stores the information for the RSA algorithms (encryption and signing). +// It always contains the RSA parameters / public key, and may or may not contain +// the associated private key. +// SYMCRYPT_DLGROUP A discrete log group to be used for the DSA and DH algorithms. It contains the +// group parameters (P,[Q],G) (The prime Q is optional). +// SYMCRYPT_DLKEY A "discrete log" key that stores the information for the DSA and DH algorithms. It +// always contains a public key, and may or may not contain the associated private key. +// SYMCRYPT_ECURVE An elliptic curve over a prime field. Contains field prime, curve parameters, +// and distinguished point (generator). +// SYMCRYPT_ECKEY An elliptic curve key for the ECDH and ECDSA algorithms. It always contains a +// public key, and may or may not contain the associated private key. +// +// See symcrypt_internal.h for structure definitions. +// + +//============================================================================================== +// Supported formats and parameters +// + +typedef enum _SYMCRYPT_NUMBER_FORMAT { + SYMCRYPT_NUMBER_FORMAT_LSB_FIRST = 1, + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST = 2, +} SYMCRYPT_NUMBER_FORMAT; +// +// SYMCRYPT_NUMBER_FORMAT is used to specify the number format for import and export +// of BYTE arrays. We support the following two number formats: +// Let p[0], ..., p[n-1] be an array containing n bytes: +// LSB_FIRST: +// Value = \sum_{i=0}^{n-1} p[i] * 2^{8*i} +// = p[0] + 2^8 * p[1] + 2^{16} * p[2] + ... +// +// MSB_FIRST: +// Value = \sum_{i=0}^{n-1} p[n-1-i] * 2^{8*i} +// = p[n-1] + 2^8 * p[n-2] + 2^{16} * p[n-3] + ... +// + +typedef struct _SYMCRYPT_RSA_PARAMS { + UINT32 version; // Version of the parameters structure + UINT32 nBitsOfModulus; // Number of bits in the modulus + UINT32 nPrimes; // Number of primes, 0 if object is only for public key + UINT32 nPubExp; // Number of public exponents (typically 1) +} SYMCRYPT_RSA_PARAMS, *PSYMCRYPT_RSA_PARAMS; +typedef const SYMCRYPT_RSA_PARAMS * PCSYMCRYPT_RSA_PARAMS; +// +// SYMCRYPT_RSA_PARAMS is used to specify all the parameters needed for creation of an +// RSA key object. The above is version 1 of the parameters. +// Currently, we only support nPubExp = 1 and nPrimes = 0 or 2. +// Note: nPrimes > 2 and nPubExp > 1 allow faster and more flexible +// RSA functionality. Though currently not supported, these parameters make it easy to add +// support in the future. +// + +// Notation for elliptic curve parameters and functions +// ==================================================== + +// E The elliptic curve group. This is typically represented as the set of 2D points (with +// coordinates from a finite field) that satisfy a specific curve equation. +// An example equation is y^2 = x^3 + Ax + B for A,B. The set E also +// contains a special "zero" point denoted by O. +// |E| The total number of points on the elliptic curve group E. +// G A special point in E which generates a (prime) order subgroup. +// GOrd The (prime) order of the generator point G. Therefore, GOrd * G = O. +// h The cofactor of the curve. It is defined as h = |E| / GOrd. Typical +// cofactors are 4 (NUMS curves), and 8 (curve 25519). + +// Definitions +// =========== + +// A "proper public key" (PPK) on the curve E is defined to be an arbitrary nonzero point of the +// subgroup generated by the point G. + +// A "proper secret key" (PSK) is the logarithm of a "proper public key" with +// respect to G. Therefore, if Q is the PPK, then the corresponding PSK is the unique +// integer s with 0 < s < GOrd such that s*G = Q. + +// If the cofactor of the curve is equal to 1, then the entire group E is generated by +// the point G and all nonzero points in E are "proper public keys". + +// Otherwise, an arbitrary point on the curve might or might not belong to the subgroup +// generated by G. Furthermore, in this case, an arbitrary point P may have order equal +// to the cofactor (or smaller), i.e. h*P=O, or an order larger than GOrd. + +// To securely handle the cases where "non-proper" public keys are imported from possibly malicious +// sources, the creators of curve parameters impose several restrictions on the secret keys +// and the algorithms used. For example, the scalar multiplication algorithm for NUMS curves +// always pre-multiplies a point by the cofactor; in order to zero-out any possible +// components of lower order ("low-order clearing"). Curve 25519 imposes this by asserting +// that all secret keys have the 3 lowest bits set to 0, which is equivalent to multiplying +// by h=8. + +typedef enum _SYMCRYPT_ECURVE_GEN_ALG_ID { + SYMCRYPT_ECURVE_GEN_ALG_ID_NULL = 0, +} SYMCRYPT_ECURVE_GEN_ALG_ID; +// +// SYMCRYPT_ECURVE_GEN_ALG_ID is used to specify (if available) the algorithm that +// generates the curve parameters from the provided seed. +// + + +typedef struct _SYMCRYPT_ECURVE_PARAMS_V2_EXTENSION { + UINT32 PrivateKeyDefaultFormat; + UINT32 HighBitRestrictionNumOfBits; + UINT32 HighBitRestrictionPosition; + UINT32 HighBitRestrictionValue; +} SYMCRYPT_ECURVE_PARAMS_V2_EXTENSION, *PSYMCRYPT_ECURVE_PARAMS_V2_EXTENSION; +typedef const SYMCRYPT_ECURVE_PARAMS_V2_EXTENSION * PCSYMCRYPT_ECURVE_PARAMS_V2_EXTENSION; +// +// SYMCRYPT_ECURVE_PARAMS_V2_EXTENSION is used to specify restrictions and default formats +// for known curves. The possible formats and restriction are explained below. +// + +// Secret key formats +// ================== +// The possible secret key formats in SymCrypt are shown below. For all formats, s denotes +// a "proper secret key" defined as above. I.e. 0 < s < GOrd. +// +// 1. "Canonical": s +// 2. "DivH": s/h mod GOrd +// 3. "DivHTimesH": h*(s/h mod GOrd) +// 4. "TimesH": h*s <-- This format is currently unsupported +// +// Remarks: +// - The above formats apply **only to external formats**: When somebody is +// importing a secret key (from test vectors, for example) or exporting a key. +// The internal format of the secret keys might be one of them or something totally +// different; the internal format is not visible to the caller. +// - Formats 3 and 4 have bigger storage requirements compared to 1 and 2, as +// the key can be up to |E|. +// - When h=1 all formats are identical. This is the case for NIST curves. +// - The NUMS curves use the "DivH" secret key format in the test vectors and the +// multiplication algorithm implicitly multiplies by h. +// - Curve 25519 uses the "DivHTimesH" secret key format in the test vectors. +typedef enum _SYMCRYPT_ECKEY_PRIVATE_FORMAT { + SYMCRYPT_ECKEY_PRIVATE_FORMAT_NULL = 0, + SYMCRYPT_ECKEY_PRIVATE_FORMAT_CANONICAL = 1, + SYMCRYPT_ECKEY_PRIVATE_FORMAT_DIVH = 2, + SYMCRYPT_ECKEY_PRIVATE_FORMAT_DIVH_TIMESH = 3, +} SYMCRYPT_ECKEY_PRIVATE_FORMAT; + +// High bit restrictions +// ===================== +// A high bit restriction is a requirement for some of the high bits of the secret keys +// (usually the most significant bits of the curve). +// Currently only curve 25519 imposes such a restriction: That the bits 255 and 254 of the +// secret key in the "DivHTimesH" format are 0 and 1, respectively. +// +// The high bit restrictions specification takes the following form: +// - Number of bits that are specified +// - Bit position of the lowest bit to be specified (starting from 0 for the LSB) +// - The bit values +// The bits that are specified refer to the relevant secret key format. +// For Canonical and DivH formats the total number of bits is the # bits of GOrd-1. +// For DivHTimesH and TimesH formats the total number of bits is the # bits of |E|-1. +// +// Note: as GOrd must be prime, #bits(Gord) == #bits(Gord-1). The same is true +// for |E|=h*GOrd as it cannot be a power of 2. +// +// The HighBitRestrictionNumOfBits field is a value between 0 and 32 (inclusive) +// and specifies how many bits of the HighBitRestrictionValue are used (starting +// from the least significant bit of the value). The bits that are restricted are +// the bits [HighBitRestrictionPosition+HighBitRestrictionNumOfBits-1, ..., HighBitRestrictionPosition] +// +// For example, let's assume it is required that the bits [104, 103, ..., 100] +// of all private keys of a curve are always 11011. +// Then the parameters should be set to +// HighBitRestrictionNumOfBits = 5 +// HighBitRestrictionPosition = 100 +// HighBitRestrictionValue = 0x1B +// + + +typedef struct _SYMCRYPT_ECURVE_PARAMS { + UINT32 version; // Version of the parameters structure (see comment below) + SYMCRYPT_ECURVE_TYPE type; // Type of the curve + SYMCRYPT_ECURVE_GEN_ALG_ID algId; // Algorithm ID for generation of parameters from seed + UINT32 cbFieldLength; // Length of the field elements in bytes + UINT32 cbSubgroupOrder; // Length of the subgroup in bytes + UINT32 cbCofactor; // Length of the cofactor in bytes + UINT32 cbSeed; // Length of the seed + // This struct is followed in memory by: + //P[cbFieldLength] Prime of the base field + //A[cbFieldLength] Coefficient A of all three types of curves + //B[cbFieldLength] Coefficient B of Weierstrass and Montgomery curves and D for Twisted Edwards curves + //Gx[cbFieldLength] X-coordinate of the distinguished point (assuming SYMCRYPT_ECPOINT_FORMAT_XY) + //Gy[cbFieldLength] Y-coordinate of the distinguished point (assuming SYMCRYPT_ECPOINT_FORMAT_XY) + //n[cbSubGroupOrder] Order of the subgroup generated by the distinguished point + //h[cbCofactor] Cofactor of the distinguished point + //S[cbSeed] Seed of the curve + + //ParamsV2Extension[sizeof(SYMCRYPT_ECURVE_PARAMS_V2_EXTENSION)]; // Only on version 2 of the parameters +} SYMCRYPT_ECURVE_PARAMS, *PSYMCRYPT_ECURVE_PARAMS; +typedef const SYMCRYPT_ECURVE_PARAMS * PCSYMCRYPT_ECURVE_PARAMS; +// +// SYMCRYPT_ECURVE_PARAMS is used to specify all the parameters needed for the curve generation. The above +// are versions 1 and 2 of the curve parameters. +// + +typedef enum _SYMCRYPT_ECPOINT_FORMAT { + SYMCRYPT_ECPOINT_FORMAT_X = 1, // One value, encoding the X coordinate only of a point + SYMCRYPT_ECPOINT_FORMAT_XY = 2, // Two equally-sized values, the first one encoding X and the second one encoding Y +} SYMCRYPT_ECPOINT_FORMAT; +// +// SYMCRYPT_ECPOINT_FORMAT is used to support different elliptic curve point formats, including possible point compression. +// + +//======================================================================== +//======================================================================== +// Main schema for object creation, deletion, and management. +// +// Object management is the same for most object types. For an object type XXX we have +// the following functions: +// +// PSYMCRYPT_XXX +// SYMCRYPT_CALL +// SymCryptXxxAllocate( <size parameters> ) +// Allocates an object of type XXX according to the specified size parameters. +// If the allocation fails, NULL is returned. +// If the allocation succeeds, an XXX pointer is returned, and the caller is responsible +// for freeing the result using SymCryptXxxFree(). +// The value of the new object is undefined. +// All the parameters to this function are published. (Object sizes cannot be private information.) +// +// VOID +// SYMCRYPT_CALL +// SymCryptXxxFree( _Inout_ PSYMCRYPT_XXX p ) +// Free an XXX object allocated with SymCryptAllocateXxx(). +// Any storage location in the object that might have contained private information is wiped. +// +// UINT32 +// SYMCRYPT_CALL +// SymCryptSizeofXxxFromYyy( <size parameters> ); +// Memory size that is sufficient to store an XXX object with size defined by the <size parameters>. +// The Yyy specifies the form of the size parameters, for example Ecurve. +// This is a runtime function as the size of an object is a run-time decision dependent on the CPU stepping. +// The result is always a multiple of the alignment requirements of this object type, so arrays can be built +// using this element size. +// +// SYMCRYPT_SIZEOF_XXX_FROM_YYY( <size parameters> ) +// This is a compile-time macro that computes a value not less than the SymCryptSizeofXxxFromYyy function, and +// is suitable to statically compute the size of a memory buffer for an object. +// (Not defined for all types.) +// +// PSYMCRYPT_XXX +// SYMCRYPT_CALL +// SymCryptXxxCreate( +// _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, +// SIZE_T cbBuffer, +// <size parameters> ); +// Create an XXX object from the provided (pbBuffer, cbBuffer) space. +// This function performs the necessary initializations of the object, but does not assign or set a value. +// The object will be able to store values up to size determined by the <size parameters>. +// Requirement: +// - pbBuffer is aligned to SYMCRYPT_ASYM_ALIGN_VALUE. Note that this can be a stricter requirement than +// SYMCRYPT_ALIGNED, and memory allocation functions might not return pointers that are suitably +// aligned. For some object types and some CPUs, the alignment requirements might be less strict. +// The main purpose of this relaxation is to always allow objects that are spaced +// SymCryptSizeofXxxFromYyy apart. The common usage is to create an array of objects. The array +// starts at a SYMCRYPT_ASYM_ALIGNed location, with each element SymCryptSizeofXxxFromYyy(..) bytes long. +// - cbBuffer >= SymCryptSizeofXxxFromYyy( <size parameters> ) +// - (pbBuffer,cbBuffer) memory must be exclusively used by this object. +// The last requirement ensures that all objects are non-overlapping (except for API functions +// that explicitly create overlapping objects). +// All parameters are published. +// It is always safe to choose +// cbBuffer = SymCryptSizeofXxxFromYyy( <size parameters> ) +// The returned object pointer is simply a cast of the pbBuffer pointer. +// Callers that manage arrays of objects can reconstruct the PSYMCRYPT_XXX by casting the buffer pointer +// to the right type. +// An object that is created with this function should be wiped, even if it doesn't contain private data. +// The SymCryptXxxWipe() function also frees any associated data that the library may maintain. +// +// VOID +// SYMCRYPT_CALL +// SymCryptXxxWipe( _Out_ PSYMCRYPT_XXX Dst ) +// All private information in the Dst object is wiped, and any associated data is freed. +// Unless otherwise specified, the Dst object is left in an undefined state. +// An SymCryptXxxAllocate-d object does not have to be wiped before it is freed +// because the SymCryptXxxFree function will perform the wipe. +// However, SymCryptXxxCreate-d objects should always be wiped even if they don't contain +// secret data, as the wipe also frees any associated data the library may maintain. +// +// VOID +// SYMCRYPT_CALL +// SymCryptXxxCopy( +// _In_ PCSYMCRYPT_XXX pxSrc, +// _Out_PSYMCRYPT_XXX pxDst ); +// Dst = Src. +// Requirement: The <size parameters> of both objects should the same. +// Src must be in a defined state, it is not valid to copy an undefined object. +// Src and Dst may be the same object (though that is a no-op). +// + +//======================================================================== +// RSAKEY objects' API +// + +#define SYMCRYPT_SIZEOF_RSAKEY_FROM_PARAMS( modBits, nPrimes, nPubExps ) \ + SYMCRYPT_INTERNAL_SIZEOF_RSAKEY_FROM_PARAMS( modBits, nPrimes, nPubExps ) +// Return a buffer size large enough to create an RSA key in which the specified +// modulus size, # primes, # public exponents, and upper bound for the bitsize of each public exponent. +// If the object will only contain a public key, nPrimes can be set to 0 + +PSYMCRYPT_RSAKEY +SYMCRYPT_CALL +SymCryptRsakeyAllocate( + _In_ PCSYMCRYPT_RSA_PARAMS pParams, + _In_ UINT32 flags ); +// +// Allocate and create a new RSAKEY object sized according to the parameters. +// If the SYMCRYPT_RSAKEY object will only be used for a public key, the +// SYMCRYPT_RSA_PARAMS structure may set nPrimes = 0. Use of +// SymCryptRsakeySetValueFromPrivateExponent requires nPrimes = 2. +// +// This call does not initialize the key. It should be +// followed by a call to SymCryptRsakeyGenerate or +// SymCryptRsakeySetValue*. +// +// No flags are specified for this function. +// + +VOID +SYMCRYPT_CALL +SymCryptRsakeyFree( _Out_ PSYMCRYPT_RSAKEY pkObj ); + +UINT32 +SYMCRYPT_CALL +SymCryptSizeofRsakeyFromParams( _In_ PCSYMCRYPT_RSA_PARAMS pParams ); +// If the to-be-allocated SYMCRYPT_RSAKEY object will only be used for a public key, the +// SYMCRYPT_RSA_PARAMS structure may set nPrimes = 0. + +PSYMCRYPT_RSAKEY +SYMCRYPT_CALL +SymCryptRsakeyCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + _In_ PCSYMCRYPT_RSA_PARAMS pParams ); +// +// Create an RSAKEY object from a buffer, but does not initialize it. +// If the SYMCRYPT_RSAKEY object will only be used for a public key, the +// SYMCRYPT_RSA_PARAMS structure may set nPrimes = 0. Use of +// SymCryptRsakeySetValueFromPrivateExponent requires nPrimes = 2. +// +// This call does not initialize the key. It should be +// followed by a call to SymCryptRsakeyGenerate or +// SymCryptRsakeySetValue*. +// + +VOID +SYMCRYPT_CALL +SymCryptRsakeyWipe( _Out_ PSYMCRYPT_RSAKEY pkDst ); + +// +//VOID +//SYMCRYPT_CALL +//SymCryptRsakeyCopy( +// _In_ PCSYMCRYPT_RSAKEY pkSrc, +// _Out_ PSYMCRYPT_RSAKEY pkDst ); +// +// This function is currently not available. +// + +//======================================================================== +// DLGROUP objects' API +// + +PSYMCRYPT_DLGROUP +SYMCRYPT_CALL +SymCryptDlgroupAllocate( UINT32 nBitsOfP, UINT32 nBitsOfQ ); +// +// Allocate a Discrete Logarithm group object suitable for the given sizes. +// +// nBitsOfP: Maximum number of bits of the field prime P. Specifying a value larger +// than the actual size is allowed, but inefficient. +// nBitsOfQ: Maximum number of bits of the group order Q. Specify the size of Q, +// or 0 if the size of Q is not (yet) known. +// +// This call does not initialize the DLGROUP. It should be followed +// by a call to SymCryptDlgroupGenerate or SymCryptDlgroupSetValue. +// +// nBitsOfQ is allowed to be equal to 0 and signifies that the size of Q +// is unknown or Q does not exist. This may be used when creating a DLGROUP +// for the DH algorithm which does not use a prime Q. +// +// Setting nBitsOfQ to something bigger than 0 signifies that the size of +// the prime Q is known and if a future caller tries to import a bigger Q then +// the SymCryptDlgroupSetValue call will fail. +// +// Technically nBitsOfQ should always be strictly less than nBitsOfP, as Q divides +// P-1. For simplicity, it is allowed that callers specify nBitsOfQ equal to nBitsOfP +// in this call, but SymCrypt will treat this as setting nBitsOfQ to (nBitsOfP-1). +// +// Setting nBitsOfQ to 0 might result in a bigger size of the DLGROUP object +// compared to setting it to a specific size (see SymCryptSizeofDlgroupFromBitsizes). +// +// Requirements: +// - nBitsOfP >= nBitsOfQ +// + +VOID +SYMCRYPT_CALL +SymCryptDlgroupFree( _Out_ PSYMCRYPT_DLGROUP pgObj ); + +UINT32 +SYMCRYPT_CALL +SymCryptSizeofDlgroupFromBitsizes( UINT32 nBitsOfP, UINT32 nBitsOfQ ); +// +// This call returns the memory size that is sufficient to store a +// DLGROUP object with primes P,Q of size nBitsOfP and nBitsOfQ, +// respectively (L,N parameters in FIPS 186-3 specs). +// +// Requirements: +// - nBitsOfP >= nBitsOfQ +// +// Remarks: +// - The value in nBitsOfQ is allowed to be equal to 0 +// (see SymCryptDlgroupAllocate). +// +// - When nBitsOfQ!=0 this is a monotonic function w.r.t. a partial order on N^2. +// I.e. for all fixed (nBitsOfP_0,nBitsOfQ_0) and (nBitsOfP_1,nBitsOfQ_1) with +// nBitsOfQ_0>0 and nBitsOfQ_1>0, +// +// (nBitsOfP_0<=nBitsOfP_1 AND nBitsOfQ_0<=nBitsOfQ_1) implies that +// F(nBitsOfP_0,nBitsOfQ_0) <= F(nBitsOfP_1,nBitsOfQ_1) +// where F is the function SymCryptSizeofDlgroupFromBitsizes. +// +// - F(nBitsOfP, 0)=F(nBitsOfP, nBitsOfP-1). Thus when nBitsOfQ==0 the +// function takes the maximum value for a fixed nBitsOfP. +// + +PSYMCRYPT_DLGROUP +SYMCRYPT_CALL +SymCryptDlgroupCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + UINT32 nBitsOfP, + UINT32 nBitsOfQ ); +// +// Creates a DL group object, but does not initialize it. It must be followed +// by a call to SymCryptDlgroupGenerate or SymCryptDlgroupSetValue. +// +// - pbBuffer,cbBuffer: memory buffer to create the object out of. The required size +// can be computed with SymCryptSizeofDlgroupFromBitsizes(). +// - nBitsOfP: number of bits of the field prime P. +// - nBitsOfQ: number of bits of the group order Q, or 0 if the size of Q is not (yet) known. +// + +VOID +SYMCRYPT_CALL +SymCryptDlgroupWipe( _Out_ PSYMCRYPT_DLGROUP pgDst ); + +VOID +SYMCRYPT_CALL +SymCryptDlgroupCopy( + _In_ PCSYMCRYPT_DLGROUP pgSrc, + _Out_ PSYMCRYPT_DLGROUP pgDst ); + +//======================================================================== +// DLKEY objects' API +// + +PSYMCRYPT_DLKEY +SYMCRYPT_CALL +SymCryptDlkeyAllocate( _In_ PCSYMCRYPT_DLGROUP pDlgroup ); +// +// This call does not initialize the key. It should be +// followed by a call to SymCryptDlkeyGenerate or +// SymCryptDlkeySetValue. +// + +VOID +SYMCRYPT_CALL +SymCryptDlkeyFree( _Out_ PSYMCRYPT_DLKEY pkObj ); + +UINT32 +SYMCRYPT_CALL +SymCryptSizeofDlkeyFromDlgroup( _In_ PCSYMCRYPT_DLGROUP pDlgroup ); + +PSYMCRYPT_DLKEY +SYMCRYPT_CALL +SymCryptDlkeyCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + _In_ PCSYMCRYPT_DLGROUP pDlgroup ); + +VOID +SYMCRYPT_CALL +SymCryptDlkeyWipe( _Out_ PSYMCRYPT_DLKEY pkDst ); + +VOID +SYMCRYPT_CALL +SymCryptDlkeyCopy( + _In_ PCSYMCRYPT_DLKEY pkSrc, + _Out_ PSYMCRYPT_DLKEY pkDst ); + +//======================================================================== +// ECURVE objects' API is slightly different than the above API schema because of the close +// relation to multiple parameters, the fact that they contain public information, +// and that they are persisted by the callers. +// Thus, the Allocate function takes in all the curve parameters and there are no Create, +// Wipe, or Copy functions. +// + +PSYMCRYPT_ECURVE +SYMCRYPT_CALL +SymCryptEcurveAllocate( + _In_ PCSYMCRYPT_ECURVE_PARAMS pParams, + _In_ UINT32 flags ); +// +// Allocate memory and create an ECURVE object which is defined +// by the parameters in pParams. +// +// - pParams: parameters that define the curve +// - flags: Not used, must be zero. +// +// Future versions might use the flags to enable different features/tradeoffs. +// There are a number of interesting memory/speed/pre-computation cost trades that can be made. +// For example, pre-computing multiples of the distinguished point, or (parallel?) pre-computation +// of (r, rG) pairs for random r values. +// +// This function applies limited validation of the pParams. The validation is intended to eliminate +// the threat of denial-of-service when hostile parameters are presented. It does not ensure that +// the parameters make sense, define a proper curve, or that any elliptic-curve operations made on +// the curve built from these parameters will fail, succeed or provide any security. +// The only guarantee provided for invalid parameters is that all operations on this curve will +// not crash and will return in some reasonable amount of time. +// +// Returns NULL if out of memory or the parameters are deemed invalid. +// If the return value is not NULL, the object must later be freed with SymCryptEcurveFree(). +// + +VOID +SYMCRYPT_CALL +SymCryptEcurveFree( _Out_ PSYMCRYPT_ECURVE pCurve ); + +//======================================================================== +// ECKEY objects' API is slightly different than the above API schema in the sense that they +// take as input an ECURVE object pointer instead of the number of digits. +// + +PSYMCRYPT_ECKEY +SYMCRYPT_CALL +SymCryptEckeyAllocate( _In_ PCSYMCRYPT_ECURVE pCurve ); + +VOID +SYMCRYPT_CALL +SymCryptEckeyFree( _Out_ PSYMCRYPT_ECKEY pkObj ); + +UINT32 +SYMCRYPT_CALL +SymCryptSizeofEckeyFromCurve( _In_ PCSYMCRYPT_ECURVE pCurve ); + +PSYMCRYPT_ECKEY +SYMCRYPT_CALL +SymCryptEckeyCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + PCSYMCRYPT_ECURVE pCurve ); + +VOID +SYMCRYPT_CALL +SymCryptEckeyWipe( _Out_ PSYMCRYPT_ECKEY pkDst ); + +VOID +SymCryptEckeyCopy( + _In_ PCSYMCRYPT_ECKEY pkSrc, + _Out_ PSYMCRYPT_ECKEY pkDst ); + + +//===================================================== +// Flags for asymmetric key generation and import + +// These flags are introduced primarily for FIPS purposes. For FIPS 140-3 rather than expose to the +// caller the specifics of what tests will be run with various algorithms, we are sanitizing flags +// provided on asymmetric key generation and import to enable the caller to indicate their intent, +// and for SymCrypt to perform the required testing. +// Below we define the flags that can be passed and when a caller should set them. +// The specifics of what tests will be run are likely to change over time, as FIPS requirements and +// our understanding of how best to implement them, change over time. Callers should not rely on +// specific behavior. + + +// Validation required by FIPS is enabled by default. This flag enables a caller to opt out of this +// validation. +#define SYMCRYPT_FLAG_KEY_NO_FIPS (0x100) + +// When opting out of FIPS, SymCrypt may still perform some sanity checks on key import +// In very performance sensitive situations where a caller strongly trusts the values it is passing +// to SymCrypt and does not care about FIPS (or can statically prove properties about the imported +// keys), a caller may specify SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION in addition to +// SYMCRYPT_FLAG_KEY_NO_FIPS to skip costly checks +#define SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION (0x200) + +// Callers must specify what algorithm(s) a given asymmetric key will be used for. +// This information will be tracked by SymCrypt, and attempting to use the key in an algorithm it +// was not generated or imported for will result in failure. +// If no algorithm is specified then the key generation or import function will fail. +#define SYMCRYPT_FLAG_DLKEY_DSA (0x1000) +#define SYMCRYPT_FLAG_DLKEY_DH (0x2000) + +#define SYMCRYPT_FLAG_ECKEY_ECDSA (0x1000) +#define SYMCRYPT_FLAG_ECKEY_ECDH (0x2000) + +#define SYMCRYPT_FLAG_RSAKEY_SIGN (0x1000) +#define SYMCRYPT_FLAG_RSAKEY_ENCRYPT (0x2000) + +//===================================================== +// RSA key operations + +BOOLEAN +SYMCRYPT_CALL +SymCryptRsakeyHasPrivateKey( _In_ PCSYMCRYPT_RSAKEY pkRsakey ); +// +// Returns TRUE if the pkRsakey object has private key information. +// + +UINT32 +SYMCRYPT_CALL +SymCryptRsakeySizeofModulus( _In_ PCSYMCRYPT_RSAKEY pkRsakey ); +// +// Returns the (tight) size in bytes of a byte array big enough to store +// the modulus of the key. +// + +UINT32 +SYMCRYPT_CALL +SymCryptRsakeyModulusBits( _In_ PCSYMCRYPT_RSAKEY pkRsakey ); +// +// Return the number of bits in the RSA modulus +// + +UINT32 +SYMCRYPT_CALL +SymCryptRsakeySizeofPublicExponent( + _In_ PCSYMCRYPT_RSAKEY pRsakey, + UINT32 index ); +// +// Returns the (tight) size in bytes of a byte array big enough to store +// the public exponent. The index specifies the index +// of the public exponent, starting with 0. +// +// Remarks: +// - Currently, only one public exponent is supported, i.e. the only +// valid index is 0. +// + +UINT32 +SYMCRYPT_CALL +SymCryptRsakeySizeofPrime( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + UINT32 index ); +// +// Returns the (tight) size in bytes of a byte array big enough to store +// the selected prime of the key. The index specifies the index of the +// prime, starting at 0. +// +// Remarks: +// - Currently, only two prime RSA is supported, i.e. the only +// valid indexes are 0 and 1. +// + +UINT32 +SYMCRYPT_CALL +SymCryptRsakeyGetNumberOfPublicExponents( _In_ PCSYMCRYPT_RSAKEY pkRsakey ); +// +// Returns the number of public exponents stored in the key. +// + +UINT32 +SYMCRYPT_CALL +SymCryptRsakeyGetNumberOfPrimes( _In_ PCSYMCRYPT_RSAKEY pkRsakey ); +// +// Returns the number of primes stored in the key. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsakeyGenerate( + _Inout_ PSYMCRYPT_RSAKEY pkRsakey, + _In_reads_opt_( nPubExp ) PCUINT64 pu64PubExp, + UINT32 nPubExp, + _In_ UINT32 flags ); +// +// Generate a new random RSA key using the information from the +// parameters passed to SymCryptRsaKeyAllocate/SymCryptRsaKeyCreate. +// PubExp is the array of nPubExp public exponent values, specifying +// the public exponents for the key. +// nPubExp must match the # public exponents in the parameters. +// If pu64PubExp == NULL, nPubExp == 0, and the key requires only one +// public exponent, then the default exponent 2^16 + 1 is used. +// +// Allowed flags: +// +// - SYMCRYPT_FLAG_KEY_NO_FIPS +// Opt-out of performing validation required for FIPS +// +// - At least one of the flags indicating what the Rsakey is to be used for must be specified: +// SYMCRYPT_FLAG_RSAKEY_SIGN +// SYMCRYPT_FLAG_RSAKEY_ENCRYPT + +// Described in more detail in the "Flags for asymmetric key generation and import" section above +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsakeySetValue( + _In_reads_bytes_( cbModulus ) PCBYTE pbModulus, + SIZE_T cbModulus, + _In_reads_( nPubExp ) PCUINT64 pu64PubExp, + UINT32 nPubExp, + _In_reads_opt_( nPrimes ) PCBYTE * ppPrimes, + _In_reads_opt_( nPrimes ) SIZE_T * pcbPrimes, + UINT32 nPrimes, + SYMCRYPT_NUMBER_FORMAT numFormat, + UINT32 flags, + _Inout_ PSYMCRYPT_RSAKEY pkRsakey ); +// +// Import key material to an RSAKEY object. The arguments are the following: +// - pbModulus is a pointer to a byte buffer of cbModulus bytes. It cannot be NULL. +// - pu64PubExp is a pointer to an array of nPubExp UINT64 exponent values. +// nPubExp must match the RSA parameters used to create the key object. +// - ppPrimes is an array of nPrimes pointers that point to byte buffers storing +// the primes. pcbPrimes is an array of nPrimes sizes such that +// the size of ppPrimes[i] is equal to pcbPrimes[i] for each i in [0, nPrimes-1]. +// - numFormat specifies the number format for all inputs +// +// Allowed flags: +// +// - SYMCRYPT_FLAG_KEY_NO_FIPS +// Opt-out of performing validation required for FIPS +// +// - SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION +// Opt-out of performing almost all validation - must be specified with SYMCRYPT_FLAG_KEY_NO_FIPS +// +// - At least one of the flags indicating what the Rsakey is to be used for must be specified: +// SYMCRYPT_FLAG_RSAKEY_SIGN +// SYMCRYPT_FLAG_RSAKEY_ENCRYPT +// +// Described in more detail in the "Flags for asymmetric key generation and import" section above +// +// Remarks: +// - Modulus and all primes are stored in the same format specified by numFormat. +// - ppPrimes, pcbPrimes, and nPrimes can be NULL, NULL, and 0 respectively, when +// importing a public key. +// - Currently, the only acceptable value of nPubExps is 1. +// - Currently, the only acceptable value of nPrimes is 2 or 0. +// - Elements of ppPrimes must represent prime numbers. +// We allow separate sizes for each prime. This seems redundant because all primes +// are approximately the same size. However, some storage/encoding formats, such as ASN.1, +// strip leading zeroes, or add an additional leading zero depending on the situation. +// Allowing separate sizes avoids the need for the caller to make a copy of the data +// into a possibly slightly larger buffer. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsakeySetValueFromPrivateExponent( + _In_reads_bytes_( cbModulus ) PCBYTE pbModulus, + SIZE_T cbModulus, + UINT64 u64PubExp, + _In_reads_bytes_( cbPrivateExponent ) PCBYTE pbPrivateExponent, + SIZE_T cbPrivateExponent, + SYMCRYPT_NUMBER_FORMAT numFormat, + UINT32 flags, + _Inout_ PSYMCRYPT_RSAKEY pkRsakey ); +// +// Import private key to an RSAKEY object using a private exponent. This is not generally +// recommended - where possible it is more efficient to import a private key using primes +// with SymCryptRsakeySetValue. +// +// The arguments are the following: +// - pbModulus is a pointer to a byte buffer of cbModulus bytes. It cannot be NULL. +// - u64PubExp is a UINT64 public exponent value. +// - pbPrivateExponent is a pointer to a byte buffer of cbPrivateExponent bytes. It +// cannot be NULL. +// - numFormat specifies the number format for all inputs +// +// Allowed flags: +// +// - SYMCRYPT_FLAG_KEY_NO_FIPS +// Opt-out of performing validation required for FIPS +// +// - SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION +// Opt-out of performing almost all validation - must be specified with SYMCRYPT_FLAG_KEY_NO_FIPS +// +// - At least one of the flags indicating what the Rsakey is to be used for must be specified: +// SYMCRYPT_FLAG_RSAKEY_SIGN +// SYMCRYPT_FLAG_RSAKEY_ENCRYPT +// +// Described in more detail in the "Flags for asymmetric key generation and import" section above +// +// Remarks: +// +// Modulus and Private exponent are stored in the same format specified by numFormat. +// +// Internally this attempts to recover a pair of primes (p1, p2) that factorize Modulus. +// This procedure has following assumptions: +// Modulus (n) is the product of two prime factors, p1 and p2 +// e*d == 1 modulo LCM(p1-1, p2-1) +// e*d != 1 modulo 2^64 +// If any of these assumptions are not met, then the method may fail. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsakeyGetValue( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _Out_writes_bytes_( cbModulus ) PBYTE pbModulus, + SIZE_T cbModulus, + _Out_writes_opt_( nPubExp ) PUINT64 pu64PubExp, + UINT32 nPubExp, + _Out_writes_opt_( nPrimes ) PBYTE * ppPrimes, + _In_reads_opt_( nPrimes ) SIZE_T * pcbPrimes, + UINT32 nPrimes, + SYMCRYPT_NUMBER_FORMAT numFormat, + UINT32 flags ); +// +// Export key material from an RSAKEY object. The arguments are the following: +// - pbModulus is a pointer to a byte buffer of cbModulus bytes. +// - pu64PubExp is an pointer to an array of nPubExp elements that receives the public exponent values. +// nPubExp must match the # public exponents in pkRsaKey. +// - ppPrimes is an array of nPrimes pointers that point to byte buffers storing +// the primes. pcbPrimes is an array of nPrimes sizes such that +// the size of ppPrimes[i] is equal to pcbPrimes[i] for each i in [0, nPrimes-1]. +// Remarks: +// - All parameters are stored in the same format specified by numFormat. +// - ppPrimes, pcbPrimes, and nPrimes can be NULL, NULL, and 0 respectively, when +// exporting a public key. +// - Currently, the only acceptable value of nPubExp is 1 or 0. +// - Currently, the only acceptable value of nPrimes is 2 or 0. +// We use separate sizes for each prime. This supports the tight encoding +// used by CNG export blobs, and uses the same format as RsakeySetValue +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsakeyGetCrtValue( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _Out_writes_opt_(nCrtExponents) PBYTE * ppCrtExponents, + _In_reads_(nCrtExponents) SIZE_T * pcbCrtExponents, + UINT32 nCrtExponents, + _Out_writes_bytes_opt_(cbCrtCoefficient) PBYTE pbCrtCoefficient, + SIZE_T cbCrtCoefficient, + _Out_writes_bytes_opt_(cbPrivateExponent) PBYTE pbPrivateExponent, + SIZE_T cbPrivateExponent, + SYMCRYPT_NUMBER_FORMAT numFormat, + UINT32 flags); +// +// Export Crt key material from an RSAKEY object. The arguments are the following: +// ppCrtExponents is an array of nCrtExponent pointers that point to byte buffers +// storing the Crt exponents. That is, d mod p-1, d mod q-1. +// pcbCrtExponents is an array of nCrtExponent sizes such that +// the size of ppCrtExponents[i] is equal to pcbCrtExponents[i] for each i in [0, nCrtExponent-1] +// pbCrtCoefficient is a pointer to a byte buffer of cbCrtCoefficient bytes, that is q^{-1} mod p +// pbPrivateExponent is a pointer to a byte buffer of cbPrivateExponent bytes, that is, d. + +// Remarks: +// - All parameters are stored in the same format specified by numFormat. +// - ppCrtExponents, pcbCrtExponents, and nCrtExponent can be NULL, NULL, and 0 respectively +// - Currently, the only acceptable value of nCrtExponent is 2 or 0. +// pbCrtCoefficient, pbPrivateExponent can be NULL; + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsakeyExtendKeyUsage( + _Inout_ PSYMCRYPT_RSAKEY pkRsakey, + UINT32 flags ); +// +// Enable an existing key which has been generated or imported to be used in specified algorithms. +// Some callers may not know at key generation or import time what algorithms a key will be used for +// and this API allows the key to be extended for use in additional algorithms. Use of this API may +// not be compliant with FIPS 140-3 +// +// - flags must be some bitwise OR of the following flags: +// SYMCRYPT_FLAG_RSAKEY_SIGN +// SYMCRYPT_FLAG_RSAKEY_ENCRYPT + +#define SYMCRYPT_DLGROUP_FIPS_LATEST (SYMCRYPT_DLGROUP_FIPS_186_3) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDlgroupGenerate( + _In_ PCSYMCRYPT_HASH hashAlgorithm, + _In_ SYMCRYPT_DLGROUP_FIPS fipsStandard, + _Inout_ PSYMCRYPT_DLGROUP pDlgroup ); +// +// Generate a Discrete Logarithm Group for use in Diffie-Hellman and DSA. +// +// - hashAlgorithm: Hash algorithm to be used for generating the group (if required by the algorithm) +// - fipsStandard: Which FIPS standard algorithm to use for generating the group. +// - pDlgroup: group object that will be initialized with a newly generated group. +// +// pDlGroup must have been created with SymCryptDlgroupAllocate() or SymCryptDlgroupCreate(). +// +// If nBitsOfQ was equal to 0 when the DLGROUP was Allocate-d/Create-d +// (and only in this case), then this function picks a default size +// for the prime Q according to the following table: +// - If nBitsOfP <= 160 then the function fails with SYMCRYPT_FIPS_FAILURE +// - If 160 < nBitsOfP <= 1024 then nBitsOfQ = 160 +// - If 1024 < nBitsOfP <= 2048 then nBitsOfQ = 256 +// - If 2048 < nBitsOfP then nBitsOfQ = 256 +// +// If fipsStandard == SYMCRYPT_DLGROUP_FIPS_NONE then no FIPS compliance is requested. +// The code defaults to SYMCRYPT_DLGROUP_FIPS_LATEST. +// +// The requirements below address the parameter values after the defaults have been substituted +// for nBitsOfQ and fipsStandard. +// +// Requirements: +// - pDlgroup!=NULL. Otherwise it returns SYMCRYPT_INVALID_ARGUMENT. +// +// - If fipsStandard == SYMCRYPT_DLGROUP_FIPS_186_2, hashAlgorithm MUST be equal to +// NULL, and nBitsOfQ <= 160 or nBitsOfQ = 0 && nBitsOfP <= 1024. +// +// - If fipsStandard == SYMCRYPT_DLGROUP_FIPS_186_3, then hashAlgorithm MUST NOT be equal +// to NULL. +// +// - If nBitsOfHash is the number of bits of the output block of hashAlgorithm, +// it is required that: +// nBitsOfQ <= nBitsOfHash <= nBitsOfP +// (where nBitsOfQ>0 was either provided by the caller of Allocate/Create +// or it was picked from the above table). +// +// - For FIPS 186-2, we have that nBitsOfHash == 160 (SHA1 output size). Therefore +// this flag can only work with nBitsOfQ up to 160 bits. Anything else will +// return SYMCRYPT_INVALID_ARGUMENT. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDlgroupSetValueSafePrime( + SYMCRYPT_DLGROUP_DH_SAFEPRIMETYPE dhSafePrimeType, + _Inout_ PSYMCRYPT_DLGROUP pDlgroup ); +// +// Sets a Discrete Logarithm Group for use in Diffie-Hellman using a named safe-prime group. +// +// - dhSafePrimeType: The type of named safe-prime group to use +// +// pDlGroup must have been created with SymCryptDlgroupAllocate() or SymCryptDlgroupCreate(). +// +// Selects the largest named safe-prime group that will fit in the allocated Dlgroup (based on the +// values of nBitsOfP and nBitsOfQ used in allocation). It is recommended that callers set nBitsOfQ +// to 0 in allocation (equivalent to nBitsOfQ = (nBitsOfP-1)) when creating a safe-prime group. +// +// Requirements: +// - pDlgroup was allocated with sufficient bits for the selected P (and Q) to fit. If there is no +// named safe-prime group with bit size <= the allocated size, it returns SYMCRYPT_INVALID_ARGUMENT. +// The minimum currently supported bitsize of named safe-prime groups is nBitsOfP = 2048. +// +// - dhSafePrimeType!=SYMCRYPT_DLGROUP_DH_SAFEPRIMETYPE_NONE. Otherwise it returns SYMCRYPT_INVALID_ARGUMENT. +// + +BOOLEAN +SYMCRYPT_CALL +SymCryptDlgroupIsSame( + _In_ PCSYMCRYPT_DLGROUP pDlgroup1, + _In_ PCSYMCRYPT_DLGROUP pDlgroup2 ); +// +// Returns true if pDlgroup1 and pDlgroup2 have same set of P and G, false otherwise. +// + +VOID +SYMCRYPT_CALL +SymCryptDlgroupGetSizes( + _In_ PCSYMCRYPT_DLGROUP pDlgroup, + _Out_ SIZE_T* pcbPrimeP, + _Out_ SIZE_T* pcbPrimeQ, + _Out_ SIZE_T* pcbGenG, + _Out_ SIZE_T* pcbSeed ); +// +// It returns the tight byte-sizes of each parameter of the group: prime P, +// prime Q, generator G, and the FIPS domain_parameter_seed. +// +// If one of the pointers is NULL then the corresponding size is ignored. +// +// Remarks: +// - If the group has no prime Q, then the returned sizes in *pcbPrimeQ and +// *pcbSeed will be 0. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDlgroupSetValue( + _In_reads_bytes_( cbPrimeP ) PCBYTE pbPrimeP, + SIZE_T cbPrimeP, + _In_reads_bytes_( cbPrimeQ ) PCBYTE pbPrimeQ, + SIZE_T cbPrimeQ, + _In_reads_bytes_( cbGenG ) PCBYTE pbGenG, + SIZE_T cbGenG, + SYMCRYPT_NUMBER_FORMAT numFormat, + _In_opt_ PCSYMCRYPT_HASH pHashAlgorithm, + _In_reads_bytes_( cbSeed ) PCBYTE pbSeed, + SIZE_T cbSeed, + UINT32 genCounter, + SYMCRYPT_DLGROUP_FIPS fipsStandard, + _Inout_ PSYMCRYPT_DLGROUP pDlgroup ); +// +// Import key material to a DLGROUP object. +// - Prime P is NOT optional and should always be imported. +// - Prime Q is an optional parameter that may or may not be imported. If not +// the group will not have a prime Q. +// - Generator G is an optional parameter. However, if not present, the +// algorithm will generate a random G of order Q. If both Q and G are missing +// the calls fails with SYMCRYPT_INVALID_ARGUMENT. +// - The parameters pHashAlgorithm, pbSeed, cbSeed and genCounter are the generation +// parameters of the FIPS standards. If fipsStandard is not equal to +// SYMCRYPT_DLGROUP_FIPS_NONE, the algorithm verifies that the input P,Q,G parameters are properly +// generated by the corresponding standard. +// If there is any discrepancy the function returns SYMCRYPT_AUTHENTICATION_FAILURE. +// Notice that these parameters are imported even if they aren't verified. +// +// Requirements: +// - The number stored in pbPrimeP and pbGenG must have at most nBitsOfP significant bits. +// Otherwise the function returns SYMCRYPT_INVALID_ARGUMENT. +// - The number stored in pbPrimeQ must have at most nBitsOfQ where nBitsOfQ is either +// the **non-zero** value input in the call of Allocate/Create or equal to nBitsOfP if +// 0 was input. +// Otherwise the function returns SYMCRYPT_INVALID_ARGUMENT. +// - The size of the seed cbSeed must be **exactly** equal to the byte-size of the imported +// modulus Q. Otherwise the function returns SYMCRYPT_INVALID_ARGUMENT. +// +// Remarks: +// - The buffers pbPrimeP, pbPrimeQ, pbGenG must all have the same number +// format defined by numFormat. +// - Primes P and (when provided) Q must represent prime numbers. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDlgroupGetValue( + _In_ PCSYMCRYPT_DLGROUP pDlgroup, + _Out_writes_bytes_( cbPrimeP ) PBYTE pbPrimeP, + SIZE_T cbPrimeP, + _Out_writes_bytes_( cbPrimeQ ) PBYTE pbPrimeQ, + SIZE_T cbPrimeQ, + _Out_writes_bytes_( cbGenG ) PBYTE pbGenG, + SIZE_T cbGenG, + SYMCRYPT_NUMBER_FORMAT numFormat, + _Out_ PCSYMCRYPT_HASH * ppHashAlgorithm, + _Out_writes_bytes_( cbSeed ) PBYTE pbSeed, + SIZE_T cbSeed, + _Out_ PUINT32 pGenCounter ); + +// +// Retrieve the group parameters from a DLGROUP. The buffers should be +// allocated by the caller. If a pbXXX parameter is NULL (and the cbXXX==0) +// then this parameter is not returned. +// +// Requirements: +// - All the buffers must have size at least equal to the corresponding +// size returned by SymCryptDlgroupGetSizes. For the pbSeed buffer the +// size must be **exactly** equal to the size returned from SymCryptDlgroupGetSizes. +// +// Remarks: +// - If the caller requests a Q but the group does not have one, this function +// will fail with SYMCRYPT_INVALID_BLOB. +// - The return value of *ppHashAlgorithm can be NULL if the group was generated +// by FIPS 186-2. +// + +//===================================================== +// DL flags +// +// Also see Generic key validation flags above + +// SYMCRYPT_FLAG_DLKEY_GEN_MODP: +// When set on SymCryptDlkeyGenerate call, generate a private key between 1 and P-2. +// When Q is known, this overrides the default behavior of generating a private key between 1 and Q-1, +// or 1 and min(2^nBitsPriv-1, Q-1) for named safe-prime groups +// When Q is not known, this does not affect the behavior +#define SYMCRYPT_FLAG_DLKEY_GEN_MODP (0x01) + +//===================================================== +// DL key operations + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDlkeySetPrivateKeyLength( _Inout_ PSYMCRYPT_DLKEY pkDlkey, UINT32 nBitsPriv, UINT32 flags ); +// +// Sets the number of bits that this dlkey can have in its private key +// The set value is only used for when the dlkey is a named safe-prime dlgroup, otherwise the value +// is ignored. +// +// Requirements: +// - pkDlkey->pDlgroup->nBitsOfQ >= nBitsPriv >= pkDlkey->pDlgroup->nMinBitsPriv +// Otherwise SYMCRYPT_INVALID_ARGUMENT is returned +// +// Allowed flags: +// - None. + +PCSYMCRYPT_DLGROUP +SYMCRYPT_CALL +SymCryptDlkeyGetGroup( _In_ PCSYMCRYPT_DLKEY pkDlkey ); +// +// Returns a pointer to the dlgroup object associated with the key. +// + +UINT32 +SYMCRYPT_CALL +SymCryptDlkeySizeofPublicKey( _In_ PCSYMCRYPT_DLKEY pkDlkey ); +// +// Returns the size in bytes of a blob big enough to retrieve the public key. +// + +UINT32 +SYMCRYPT_CALL +SymCryptDlkeySizeofPrivateKey( _In_ PCSYMCRYPT_DLKEY pkDlkey ); +// +// Returns the size in bytes of a blob big enough to retrieve the private key. +// + +BOOLEAN +SYMCRYPT_CALL +SymCryptDlkeyHasPrivateKey( _In_ PCSYMCRYPT_DLKEY pkDlkey ); +// +// Returns TRUE if the pkDlkey object has a private key set. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDlkeyGenerate( + _In_ UINT32 flags, + _Inout_ PSYMCRYPT_DLKEY pkDlkey ); +// +// Allowed flags: +// - SYMCRYPT_FLAG_DLKEY_GEN_MODP +// When set, generate a private key between 1 and P-2. +// When Q is known, this overrides the default behavior of generating a private key between 1 and Q-1, +// or 1 and min(2^nBitsPriv-1, Q-1) for named safe-prime groups +// When Q is not known, this does not affect the behavior +// +// - SYMCRYPT_FLAG_KEY_NO_FIPS +// Opt-out of performing validation required for FIPS +// +// - At least one of the flags indicating what the Dlkey is to be used for must be specified: +// SYMCRYPT_FLAG_DLKEY_DSA +// SYMCRYPT_FLAG_DLKEY_DH +// +// Note: +// If SYMCRYPT_FLAG_DLKEY_GEN_MODP is specified then SYMCRYPT_FLAG_KEY_NO_FIPS must also be +// specified to avoid SYMCRYPT_INVALID_ARGUMENT, as FIPS requires the default generation behavior +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDlkeySetValue( + _In_reads_bytes_( cbPrivateKey ) PCBYTE pbPrivateKey, + SIZE_T cbPrivateKey, + _In_reads_bytes_( cbPublicKey ) PCBYTE pbPublicKey, + SIZE_T cbPublicKey, + SYMCRYPT_NUMBER_FORMAT numFormat, + UINT32 flags, + _Inout_ PSYMCRYPT_DLKEY pkDlkey ); +// +// Import key material to a DLKEY object. +// +// Allowed flags: +// +// - SYMCRYPT_FLAG_KEY_NO_FIPS +// Opt-out of performing validation required for FIPS +// +// - SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION +// Opt-out of performing almost all validation - must be specified with SYMCRYPT_FLAG_KEY_NO_FIPS +// +// - At least one of the flags indicating what the Dlkey is to be used for must be specified: +// SYMCRYPT_FLAG_DLKEY_DSA +// SYMCRYPT_FLAG_DLKEY_DH +// +// Described in more detail in the "Flags for asymmetric key generation and import" section above +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDlkeyGetValue( + _In_ PCSYMCRYPT_DLKEY pkDlkey, + _Out_writes_bytes_( cbPrivateKey ) + PBYTE pbPrivateKey, + SIZE_T cbPrivateKey, + _Out_writes_bytes_( cbPublicKey ) + PBYTE pbPublicKey, + SIZE_T cbPublicKey, + SYMCRYPT_NUMBER_FORMAT numFormat, + UINT32 flags ); +// +// Retrieve the public or the private key (or both) from a DLKEY. The buffers should be +// allocated by the caller. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDlkeyExtendKeyUsage( + _Inout_ PSYMCRYPT_DLKEY pkDlkey, + UINT32 flags ); +// +// Enable an existing key which has been generated or imported to be used in specified algorithms. +// Some callers may not know at key generation or import time what algorithms a key will be used for +// and this API allows the key to be extended for use in additional algorithms. Use of this API may +// not be compliant with FIPS 140-3. +// +// - flags must be some bitwise OR of the following flags: +// SYMCRYPT_FLAG_DLKEY_DSA +// SYMCRYPT_FLAG_DLKEY_DH + +//===================================================== +// Elliptic curve operations and supported curves +// + +UINT32 +SYMCRYPT_CALL +SymCryptEcurvePrivateKeyDefaultFormat( _In_ PCSYMCRYPT_ECURVE pCurve ); +// +// This function returns the private key default format of the input curve. +// + +UINT32 +SYMCRYPT_CALL +SymCryptEcurveHighBitRestrictionNumOfBits( _In_ PCSYMCRYPT_ECURVE pCurve ); +// +// This function returns the number of bits specified by the high bit restriction +// value of the input curve. +// + +UINT32 +SYMCRYPT_CALL +SymCryptEcurveHighBitRestrictionPosition( _In_ PCSYMCRYPT_ECURVE pCurve ); +// +// This function returns the position of the high bit restriction +// value of the input curve. +// + +UINT32 +SYMCRYPT_CALL +SymCryptEcurveHighBitRestrictionValue( _In_ PCSYMCRYPT_ECURVE pCurve ); +// +// This function returns the high bit restriction value of the input curve. +// + +UINT32 +SYMCRYPT_CALL +SymCryptEcurveBitsizeofFieldModulus( _In_ PCSYMCRYPT_ECURVE pCurve ); +// +// This function returns the number of bits of a field element on which +// the curve is defined. +// + +UINT32 +SYMCRYPT_CALL +SymCryptEcurveBitsizeofGroupOrder( _In_ PCSYMCRYPT_ECURVE pCurve ); +// +// This function returns the number of bits of the order of the subgroup generated by +// the distinguished point of the curve. +// + +UINT32 +SYMCRYPT_CALL +SymCryptEcurveSizeofFieldElement( _In_ PCSYMCRYPT_ECURVE pCurve ); +// +// This function returns the number of bytes of a field element. It is used to +// construct buffers for setting and getting the value of elliptic curve points (most +// notably the public key of an ECKEY object). +// +// The result is equal to the cbFieldLength field of the parameters that created the curve. +// + +UINT32 +SYMCRYPT_CALL +SymCryptEcurveSizeofScalarMultiplier( _In_ PCSYMCRYPT_ECURVE pCurve ); +// +// This function returns the number of bytes of a scalar integer that is big enough to +// store a private key (or a multiplier of an elliptic curve point). It is used to +// construct buffers for setting and getting the value of a scalar multiplier (most +// notably the private key of an ECKEY object - see SymCryptEckeySetValue and +// SymCryptEckeyGetValue). +// +// The result is equal to sizeof( subgroupOrder * co-factor ). +// + +BOOLEAN +SYMCRYPT_CALL +SymCryptEcurveIsSame( + _In_ PCSYMCRYPT_ECURVE pCurve1, + _In_ PCSYMCRYPT_ECURVE pCurve2); +// +// Returns true if pCurve1 and pCurve2 have same type, P, A, and B - false otherwise. +// +// Note: This does not check that the curves have the same G set, callers may additionally +// consider calling SymCryptEcpointIsEqual to compare the curves' distinguished points. +// + +// Internally supported curves +extern const PCSYMCRYPT_ECURVE_PARAMS SymCryptEcurveParamsNistP192; +extern const PCSYMCRYPT_ECURVE_PARAMS SymCryptEcurveParamsNistP224; +extern const PCSYMCRYPT_ECURVE_PARAMS SymCryptEcurveParamsNistP256; +extern const PCSYMCRYPT_ECURVE_PARAMS SymCryptEcurveParamsNistP384; +extern const PCSYMCRYPT_ECURVE_PARAMS SymCryptEcurveParamsNistP521; + +extern const PCSYMCRYPT_ECURVE_PARAMS SymCryptEcurveParamsNumsP256t1; +extern const PCSYMCRYPT_ECURVE_PARAMS SymCryptEcurveParamsNumsP384t1; +extern const PCSYMCRYPT_ECURVE_PARAMS SymCryptEcurveParamsNumsP512t1; + +extern const PCSYMCRYPT_ECURVE_PARAMS SymCryptEcurveParamsCurve25519; + +typedef enum _SYMCRYPT_ECURVE_ID +{ + SYMCRYPT_ECURVE_ID_NULL = 0, + SYMCRYPT_ECURVE_ID_NIST_P192 = 1, + SYMCRYPT_ECURVE_ID_NIST_P224 = 2, + SYMCRYPT_ECURVE_ID_NIST_P256 = 3, + SYMCRYPT_ECURVE_ID_NIST_P384 = 4, + SYMCRYPT_ECURVE_ID_NIST_P521 = 5, + SYMCRYPT_ECURVE_ID_NUMS_P256T1 = 6, + SYMCRYPT_ECURVE_ID_NUMS_P384T1 = 7, + SYMCRYPT_ECURVE_ID_NUMS_P512T1 = 8, + SYMCRYPT_ECURVE_ID_CURVE25519 = 9 +} SYMCRYPT_ECURVE_ID; + +PCSYMCRYPT_ECURVE_PARAMS +SYMCRYPT_CALL +SymCryptGetEcurveParams( SYMCRYPT_ECURVE_ID ecurveId ); +// +// Returns a pointer to the elliptic curve parameters structure for the specified curve ID. +// Returns NULL if the curve ID is invalid. +// + +//===================================================== +// ECC flags +// +// Also see Generic key validation flags above + +// SYMCRYPT_FLAG_ECDSA_NO_TRUNCATION: This flag applies only to the ECDSA algorithm. When set, the sign +// and verify algorithms will not do hash truncation. The caller can use their own truncation method in such case. +// (default: according to the ECDSA standard) +#define SYMCRYPT_FLAG_ECDSA_NO_TRUNCATION (0x08) + +//===================================================== +// EC key operations + +UINT32 +SYMCRYPT_CALL +SymCryptEckeySizeofPublicKey( + _In_ PCSYMCRYPT_ECKEY pkEckey, + _In_ SYMCRYPT_ECPOINT_FORMAT ecPointFormat ); +// +// Returns the size in bytes of a blob big enough to retrieve the public key in +// the specified ECPOINT format. +// + +UINT32 +SYMCRYPT_CALL +SymCryptEckeySizeofPrivateKey( _In_ PCSYMCRYPT_ECKEY pkEckey ); +// +// Returns the size in bytes of a blob big enough to retrieve the private key. +// It is equal to SymCryptEcurveSizeofScalarMultiplier( pCurve ) where pCurve is the +// curve that created the key. +// + +BOOLEAN +SYMCRYPT_CALL +SymCryptEckeyHasPrivateKey( _In_ PCSYMCRYPT_ECKEY pkEckey ); +// +// Returns TRUE if the pkEckey object has a private key set. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEckeySetValue( + _In_reads_bytes_( cbPrivateKey ) PCBYTE pbPrivateKey, + SIZE_T cbPrivateKey, + _In_reads_bytes_( cbPublicKey ) PCBYTE pbPublicKey, + SIZE_T cbPublicKey, + SYMCRYPT_NUMBER_FORMAT numFormat, + SYMCRYPT_ECPOINT_FORMAT ecPointFormat, + UINT32 flags, + _Inout_ PSYMCRYPT_ECKEY pEckey ); +// +// Import key material to an ECKEY object. +// +// Requirements: +// (pbPrivateKey, cbPrivateKey): a buffer that contains the private key, encoded +// in the format specified by the numFormat parameter. +// Note that the integer encoded in (pbPrivateKey, cbPrivateKey) is taken modulo the order of the +// subgroup generated by the curve generator. Callers that want a uniform private key value +// should ensure that the input is uniform in the range [1..GOrd-1]. +// +// Requirements: cbPrivateKey == SymCryptEckeySizeofPrivateKey( pEckey ) +// +// If pbPrivateKey == NULL && cbPrivateKey == 0, then no private key is imported, and the +// resulting ECKEY object will not have a private key. +// +// (pbPublicKey, cbPublicKey): buffer that contains the public key, encoded in the format +// specified by the format parameter, the buffer length, and the curve properties. +// +// Requirements: cbPublicKey == SymCryptEckeySizeofPublicKey( pEckey, ecPointFormat ) +// +// If no public key is presented (pbPublicKey == NULL && cbPublicKey == 0) then the public +// key is computed from the provided private key. +// +// At least one of the public and private keys must be provided. +// +// If both are provided, then they must match. +// +// The algorithm always sets the corresponding public key +// +// Allowed flags: +// +// - SYMCRYPT_FLAG_KEY_NO_FIPS +// Opt-out of performing validation required for FIPS +// +// - SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION +// Opt-out of performing almost all validation - must be specified with SYMCRYPT_FLAG_KEY_NO_FIPS +// +// - At least one of the flags indicating what the Eckey is to be used for must be specified: +// SYMCRYPT_FLAG_ECKEY_ECDSA +// SYMCRYPT_FLAG_ECKEY_ECDH +// +// Described in more detail in the "Flags for asymmetric key generation and import" section above +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEckeySetRandom( + _In_ UINT32 flags, + _Inout_ PSYMCRYPT_ECKEY pEckey ); +// +// Generates a new Eckey public/private key pair using the specified curve. The public key +// is a uniformly random non-zero point of the subgroup generated by the distinguished point +// of the curve. This complies with the FIPS 186-4 standard. +// +// Remarks: +// - In the case that the highbit restrictions on the curve are unsatisfiable, i.e. +// there is no private key smaller than the group order it returns +// SYMCRYPT_INVALID_ARGUMENT. +// +// Allowed flags: +// +// - SYMCRYPT_FLAG_KEY_NO_FIPS +// Opt-out of performing validation required for FIPS +// +// - At least one of the flags indicating what the Eckey is to be used for must be specified: +// SYMCRYPT_FLAG_ECKEY_ECDSA +// SYMCRYPT_FLAG_ECKEY_ECDH + +// Described in more detail in the "Flags for asymmetric key generation and import" section above +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEckeyGetValue( + _In_ PCSYMCRYPT_ECKEY pEckey, + _Out_writes_bytes_( cbPrivateKey ) + PBYTE pbPrivateKey, + SIZE_T cbPrivateKey, + _Out_writes_bytes_( cbPublicKey ) + PBYTE pbPublicKey, + SIZE_T cbPublicKey, + SYMCRYPT_NUMBER_FORMAT numFormat, + SYMCRYPT_ECPOINT_FORMAT ecPointFormat, + UINT32 flags ); +// +// Retrieve the public or the private key (or both) from an ECKEY. The buffers should be +// allocated by the caller. +// +// If (pbPrivateKey != NULL), then the function will return the private key in pbPrivateKey +// in the format specified by the numFormat parameter **as long as** the following three +// requirements are satisfied: +// 1. cbPrivateKey >= SymCryptEckeySizeofPrivateKey( pEckey ) +// 2. pEckey contains a private key part (If this fails the function returns SYMCRYPT_INVALID_BLOB) +// If (pbPrivateKey == NULL) and (cbPrivateKey == 0), then these parameters are ignored +// and no private key is returned. +// +// If (pbPublicKey != NULL), then the function will return the public key in pbPublicKey +// in the format specified by the numFormat and the ecPointFormat parameters +// **as long as** the following requirement is satisfied: +// 1. cbPublicKey >= SymCryptEckeySizeofPublicKey( pEckey, ecPointFormat ) +// If (pbPublicKey == NULL) and (cbPublicKey == 0), then these parameters are ignored +// and no public key is returned. +// +// Allowed flags: +// - None. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEckeyExtendKeyUsage( + _Inout_ PSYMCRYPT_ECKEY pEckey, + UINT32 flags ); +// +// Enable an existing key which has been generated or imported to be used in specified algorithms. +// Some callers may not know at key generation or import time what algorithms a key will be used for +// and this API allows the key to be extended for use in additional algorithms. Use of this API may +// not be compliant with FIPS 140-3 +// +// - flags must be some bitwise OR of the following flags: +// SYMCRYPT_FLAG_ECKEY_ECDSA +// SYMCRYPT_FLAG_ECKEY_ECDH + +/************************ + * Crypto algorithm API * + ************************/ + +// +// The Crypto algorithm API implements various cryptographic algorithms that use large-integer arithmetic. +// + +// +// RSA Encryption Algorithms +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaRawEncrypt( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_NUMBER_FORMAT numFormat, + UINT32 flags, + _Out_writes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst ); +// +// This function encrypts the buffer pbSrc (of size cbSrc bytes) under the pkRsakey key using textbook RSA. +// The output is stored in the pbDst buffer (of size cbDst bytes). +// For in place encryption pbSrc = pbDst. +// +// Both input and output buffers store a number in the number format numFormat. +// +// Requirements: +// - If cbDst is too small for the result then SYMCRYPT_BUFFER_TOO_SMALL is returned. +// Safe size is cbDst = SymCryptRsakeySizeofModulus(pkRsakey). +// - The number stored in the pbSrc buffer must be strictly smaller than the value +// of the public modulus in pkRsakey. +// +// Allowed flags: +// None +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaRawDecrypt( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_NUMBER_FORMAT numFormat, + UINT32 flags, + _Out_writes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst ); +// +// This function decrypts the buffer pbSrc (of size cbSrc bytes) with the pkRsakey key using textbook RSA. +// The output is stored in the pbDst buffer (of size cbDst bytes). +// For in place decryption pbSrc = pbDst. +// +// Both input and output buffers store a number in the number format numFormat. +// +// Requirements: +// - If cbDst is too small for the result then SYMCRYPT_BUFFER_TOO_SMALL is returned. +// Safe size is cbDst = SymCryptRsakeySizeofModulus(pkRsakey). +// - The number stored in the pbSrc buffer must be strictly smaller than the value +// of the public modulus in pkRsakey. +// - The RSAKEY pkRsakey must have a private key part. Otherwise SYMCRYPT_INVALID_ARGUMENT is returned. +// +// Allowed flags: +// None +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaPkcs1Encrypt( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + UINT32 flags, + SYMCRYPT_NUMBER_FORMAT nfDst, + _Out_writes_opt_( cbDst ) PBYTE pbDst, + SIZE_T cbDst, + _Out_ SIZE_T *pcbDst ); +// +// This function encrypts the buffer pbSrc under the pkRsakey key using RSA PKCS1 v1.5. +// The output is stored in the pbDst buffer and the number of bytes written in *pcbDst. +// +// If pbDst == NULL then only the *pcbDst is output. +// +// nfDst is the number format of the ciphertext (i.e. the pbDst buffer). +// +// Allowed flags: +// None +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaPkcs1Decrypt( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_NUMBER_FORMAT nfSrc, + UINT32 flags, + _Out_writes_opt_( cbDst ) PBYTE pbDst, + SIZE_T cbDst, + _Out_ SIZE_T *pcbDst ); +// +// Perform an RSA-PKCS1 decryption. +// - pbSrc/cbSrc: source buffer +// - nfSrc: format of source buffer +// - flags: must be 0 +// - pbDst/cbDst: destination buffer +// - pcbDst: receives the size of the decrypted data. +// +// If the data in improperly formatted, an error is returned. +// If pbDst == NULL, then *pcbDst is set to the decrypted data length, and the functions succeeds. +// This is not recommended as retrieving the actual data requires a second RSA decryption, +// which is expensive. We recommend that callers provide a large enough buffer the first time. +// If pbDst != NULL and cbDst is too small, then *pcbDst is set to the required size of pbDst +// and SYMCRYPT_BUFFER_TOO_SMALL is returned. +// +// Allowed flags: +// None +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaOaepEncrypt( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + _In_ PCSYMCRYPT_HASH hashAlgorithm, + _In_reads_bytes_( cbLabel ) PCBYTE pbLabel, + SIZE_T cbLabel, + UINT32 flags, + SYMCRYPT_NUMBER_FORMAT nfDst, + _Out_writes_opt_( cbDst ) PBYTE pbDst, + SIZE_T cbDst, + _Out_ SIZE_T *pcbDst ); +// +// This function encrypts the buffer pbSrc under the pkRsakey key using RSA OAEP. +// The output is stored in the pbDst buffer and the number of bytes written in *pcbDst. +// +// If pbDst == NULL then only the *pcbDst is output. +// +// nfDst is the number format of the ciphertext (i.e. the pbDst buffer). +// +// Allowed flags: +// None +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaOaepDecrypt( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_NUMBER_FORMAT nfSrc, + _In_ PCSYMCRYPT_HASH hashAlgorithm, + _In_reads_bytes_( cbLabel ) PCBYTE pbLabel, + SIZE_T cbLabel, + UINT32 flags, + _Out_writes_opt_( cbDst ) PBYTE pbDst, + SIZE_T cbDst, + _Out_ SIZE_T *pcbDst ); +// +// This function decrypts the buffer pbSrc with the pkRsakey key using RSA OAEP. +// The output is stored in the pbDst buffer and the number of bytes written in *pcbDst. +// +// If pbDst == NULL then only the *pcbDst is output. +// +// nfSrc is the number format of the ciphertext (i.e. the pbSrc buffer). +// +// Requirement: +// - cbSrc <= SymCryptRsakeySizeofModulus( pkRsakey ). Otherwise the function +// returns SYMCRYPT_INVALID_ARGUMENT. +// +// Allowed flags: +// None +// + +// +// RSA Signing Algorithms +// + +#define SYMCRYPT_FLAG_RSA_PKCS1_NO_ASN1 (0x01) +#define SYMCRYPT_FLAG_RSA_PKCS1_OPTIONAL_HASH_OID (0x02) + +#define SYMCRYPT_FLAG_RSA_PSS_VERIFY_WITH_MINIMUM_SALT (0x04) + +// +// SYMCRYPT_FLAG_RSA_PKCS1_NO_ASN1: For RSA PKCS1 to not use the OID on signing or verifying. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaPkcs1Sign( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _In_reads_bytes_( cbHashValue ) PCBYTE pbHashValue, + SIZE_T cbHashValue, + _In_ PCSYMCRYPT_OID pHashOIDs, + _In_ SIZE_T nOIDCount, + UINT32 flags, + SYMCRYPT_NUMBER_FORMAT nfSignature, + _Out_writes_opt_( cbSignature ) PBYTE pbSignature, + SIZE_T cbSignature, + _Out_ SIZE_T *pcbSignature ); +// +// This function signs a message (its hash value is stored in pbHashValue) with +// the pkRsakey key using RSA PKCS1 v1.5. The signature is stored in the pbSignature +// buffer and the number of bytes written in *pcbSignature. +// +// pHashOIDs points to an array of SYMCRYPT_OID and the array size is nOIDCount +// +// If pbSignature == NULL then only the *pcbSignature is output. +// +// nfSignature is the number format of the signature (i.e. the pbSignature buffer). Currently +// only SYMCRYPT_NUMBER_FORMAT_MSB_FIRST is supported. +// +// Allowed flags: +// SYMCRYPT_FLAG_RSA_PKCS1_NO_ASN1 +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaPkcs1Verify( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _In_reads_bytes_( cbHashValue ) PCBYTE pbHashValue, + SIZE_T cbHashValue, + _In_reads_bytes_( cbSignature ) PCBYTE pbSignature, + SIZE_T cbSignature, + SYMCRYPT_NUMBER_FORMAT nfSignature, + _In_reads_opt_( nOIDCount ) PCSYMCRYPT_OID pHashOID, + _In_ SIZE_T nOIDCount, + UINT32 flags ); +// +// This function verifies the signature of a message (its hash value is input in +// pbHashValue) with the pkRsakey key using RSA PKCS1 v1.5. The signature is input +// in the pbSignature buffer. +// +// pHashOIDs points to an array of SYMCRYPT_OID and the array size is nOIDCount +// +// It returns SYMCRYPT_NO_ERROR if the verification succeeded or SYMCRYPT_SIGNATURE_VERIFICATION_FAILURE +// if it failed. +// +// nfSignature is the number format of the signature (i.e. the pbSignature buffer). Currently +// only SYMCRYPT_NUMBER_FORMAT_MSB_FIRST is supported. +// +// Allowed flags: +// SYMCRYPT_FLAG_RSA_PKCS1_OPTIONAL_HASH_OID +// +// When the flag is set, this function will do signature verification by not using hash OID when needed +// + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaPssSign( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _In_reads_bytes_( cbHashValue ) PCBYTE pbHashValue, + SIZE_T cbHashValue, + _In_ PCSYMCRYPT_HASH hashAlgorithm, + SIZE_T cbSalt, + UINT32 flags, + SYMCRYPT_NUMBER_FORMAT nfSignature, + _Out_writes_opt_( cbSignature ) PBYTE pbSignature, + SIZE_T cbSignature, + _Out_ SIZE_T *pcbSignature ); +// +// Sign a message using RSA-PSS +// - pkRsaKey: Key to sign with; must contain a private key +// - pbHashValue/cbHashValue: Value to sign +// - hashAlgorithm: Hash algorithm to use in the MGF of PSS +// - cbSalt: # bytes of salt to use (typically equal to size of hash value) +// - flags: must be 0 +// - nfSignature: Number format of signature. Typically SYMCRYPT_NUMBER_FORMAT_MSB_FIRST +// - pbSignature/cbSignature: buffer that receives the signature. +// If pbSignature == NULL< only *pcbSignature is returned. +// Note: pbSignature receives an integer, so if the buffer is larger than the modulus size +// it will be padded with zeroes. For MSB-first format the zeroes are at the start of the buffer. +// Typically this buffer is the same size as the RSA modulus. +// - pcbSignature: receives the size of the signature. +// +// Return value: +// If cbHashValue + cbSalt is too large (above modulus size minus 2 or 3 depending on details) then +// signature generation fails. +// +// Allowed flags: +// None +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaPssVerify( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _In_reads_bytes_( cbHashValue ) PCBYTE pbHashValue, + SIZE_T cbHashValue, + _In_reads_bytes_( cbSignature ) PCBYTE pbSignature, + SIZE_T cbSignature, + SYMCRYPT_NUMBER_FORMAT nfSignature, + _In_ PCSYMCRYPT_HASH hashAlgorithm, + SIZE_T cbSalt, + UINT32 flags ); +// +// This function verifies the signature of a message (its hash value is input in +// pbHashValue) with the pkRsakey key using RSA PSS. The signature is input +// in the pbSignature buffer. +// +// It returns SYMCRYPT_NO_ERROR if the verification succeeded or SYMCRYPT_SIGNATURE_VERIFICATION_FAILURE +// if it failed. +// +// nfSignature is the number format of the signature (i.e. the pbSignature buffer). Currently +// only SYMCRYPT_NUMBER_FORMAT_MSB_FIRST is supported. +// +// Requirements: +// - cbHashValue <= SymCryptRsakeySizeofModulus( pkRsakey ) +// - cbSalt <= SymCryptRsakeySizeofModulus( pkRsakey ) +// - cbSignature <= SymCryptRsakeySizeofModulus( pkRsakey ) +// +// Allowed flags: +// SYMCRYPT_FLAG_RSA_PSS_VERIFY_WITH_MINIMUM_SALT +// +// When the flag is set, this function will do signature verification using the cbSalt parameter as +// a minimum value for the salt length, rather than using it as an exact value. Specifying this and +// setting cbSalt = 0 allows callers to verify a signature which has a valid encoding with any salt +// length using a single call. +// + +VOID +SYMCRYPT_CALL +SymCryptRsaSelftest(void); +// +// FIPS self-test for RSA sign/verify. This function uses a hardcoded key to perform the self-test +// without having to generate a key. If the self-test fails, SymCryptFatal will be called to +// fastfail. +// The self-test will automatically be performed before first operational use of RSA if using a key +// with FIPS validation, so most callers should never use this function. +// + + +// +// DSA +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDsaSign( + _In_ PCSYMCRYPT_DLKEY pKey, + _In_reads_bytes_( cbHashValue ) PCBYTE pbHashValue, + SIZE_T cbHashValue, + SYMCRYPT_NUMBER_FORMAT format, + UINT32 flags, + _Out_writes_bytes_( cbSignature ) PBYTE pbSignature, + SIZE_T cbSignature ); +// +// Sign a message using the DSA signature algorithm. +// (pbHashValue,cbHashValue) is the output of the hash function that hashed the message to be signed. +// (pbSignature,cbSignature) is the output buffer that receives the signature. +// The signature is encoded as two integers (R,S) mod Q in the format specified by the 'format' parameter. +// +// Allowed flags: +// None +// + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDsaVerify( + _In_ PCSYMCRYPT_DLKEY pKey, + _In_reads_bytes_( cbHashValue ) PCBYTE pbHashValue, + SIZE_T cbHashValue, + _In_reads_bytes_( cbSignature ) PCBYTE pbSignature, + SIZE_T cbSignature, + SYMCRYPT_NUMBER_FORMAT format, + UINT32 flags ); +// +// Verifies a DSA signature using the public part of Key. +// +// It returns SYMCRYPT_NO_ERROR if the verification succeeded or SYMCRYPT_SIGNATURE_VERIFICATION_FAILURE +// if it failed. +// +// Allowed flags: +// None +// + +VOID +SYMCRYPT_CALL +SymCryptDsaSelftest(void); +// +// FIPS self-test for DSA sign/verify. This function uses a hardcoded key to perform the self-test +// without having to generate a key. If the self-test fails, SymCryptFatal will be called to +// fastfail. +// The self-test will automatically be performed before first operational use of DSA if using a key +// with FIPS validation, so most callers should never use this function. +// + +// +// DH +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDhSecretAgreement( + _In_ PCSYMCRYPT_DLKEY pkPrivate, + _In_ PCSYMCRYPT_DLKEY pkPublic, + SYMCRYPT_NUMBER_FORMAT format, + UINT32 flags, + _Out_writes_( cbAgreedSecret ) PBYTE pbAgreedSecret, + SIZE_T cbAgreedSecret ); +// +// Calculates the agreed secret of a DH key exchange and stores it +// in the pbAgreedSecret buffer under the specified number format. +// +// format is the number format of the agreed secret (pbAgreedSecret buffer). +// +// Allowed flags: +// - None +// + +VOID +SYMCRYPT_CALL +SymCryptDhSecretAgreementSelftest(void); +// +// FIPS self-test for DH secret agreement. This function uses two hardcoded keys and a precalculated +// known answer to perform the self-test without having to generate a key. If the self-test fails, +// SymCryptFatal will be called to fastfail. +// The self-test will automatically be performed before first operational use of DH if using keys +// with FIPS validation, so most callers should never use this function. +// + +// +// For both ECDSA and ECDH algorithms the key generation and management is the same. The main algorithms are: +// - SymCryptEckeyAllocate or SymCryptEckeyCreate for creation of the ECKEY object. +// - SymCryptEckeySetValue or SymCryptEckeySetRandom for filling the key with the preferred key material. +// - SymCryptEckeyFree or SymCryptEckeyWipe for freeing or wiping the key. +// + +// +// ECDSA +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEcDsaSign( + _In_ PCSYMCRYPT_ECKEY pKey, + _In_reads_bytes_( cbHashValue ) PCBYTE pbHashValue, + SIZE_T cbHashValue, + SYMCRYPT_NUMBER_FORMAT format, + UINT32 flags, + _Out_writes_bytes_( cbSignature ) PBYTE pbSignature, + SIZE_T cbSignature ); +// +// Sign a message using the ECDSA signature algorithm. +// (pbHashValue,cbHashValue) is the output of the hash function that hashed the message to be signed. +// (pbSignature,cbSignature) is the output buffer that receives the signature. +// The signature is encoded as two integers in the format specified by the 'format' parameter. +// +// Allowed flags: +// SYMCRYPT_FLAG_ECDSA_NO_TRUNCATION: If set then the hash value will +// not be truncated. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEcDsaVerify( + _In_ PCSYMCRYPT_ECKEY pKey, + _In_reads_bytes_( cbHashValue ) PCBYTE pbHashValue, + SIZE_T cbHashValue, + _In_reads_bytes_( cbSignature ) PCBYTE pbSignature, + SIZE_T cbSignature, + SYMCRYPT_NUMBER_FORMAT format, + UINT32 flags ); + +// +// Verifies an ECDSA signature using the public part of Key. +// +// It returns SYMCRYPT_NO_ERROR if the verification succeeded or SYMCRYPT_SIGNATURE_VERIFICATION_FAILURE +// if it failed. +// +// Allowed flags: +// SYMCRYPT_FLAG_ECDSA_NO_TRUNCATION: If set then the hash value will +// not be truncated. + +VOID +SYMCRYPT_CALL +SymCryptEcDsaSelftest(void); +// +// FIPS self-test for ECDSA sign/verify. This function uses a hardcoded key to perform the self-test +// without having to generate a key. If the self-test fails, SymCryptFatal will be called to +// fastfail. +// The self-test will automatically be performed before first operational use of ECDSA if using a +// key with FIPS validation, so most callers should never use this function. +// + +// +// ECDH +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEcDhSecretAgreement( + _In_ PCSYMCRYPT_ECKEY pkPrivate, + _In_ PCSYMCRYPT_ECKEY pkPublic, + SYMCRYPT_NUMBER_FORMAT format, + UINT32 flags, + _Out_writes_( cbAgreedSecret ) PBYTE pbAgreedSecret, + SIZE_T cbAgreedSecret ); + +// +// Calculates the agreed secret of a DH key exchange and stores it +// in the pbAgreedSecret buffer under the specified number format. +// +// Allowed flags: +// - None +// + +VOID +SYMCRYPT_CALL +SymCryptEcDhSecretAgreementSelftest(void); +// +// FIPS self-test for ECDH secret agreement. This function uses two hardcoded keys and a +// precalculated known answer to perform the self-test without having to generate a key. If the +// self-test fails, SymCryptFatal will be called to fastfail. +// The self-test will automatically be performed before first operational use of ECDH if using keys +// with FIPS validation, so most callers should never use this function. +// + +//======================================================================== +// +// Stateful Hash-based Signatures +// +// Hash-based signature schemes are digital signature schemes built out of hash +// functions. Stateful hash-based signatures are many-time signature schemes +// composed of a one-time-signature (OTS) scheme and a Merkle-tree representing +// multiple OTS with a public root value. At each signing operation, one of the +// (unused) OTS keys is used to sign the message, and the private key is updated +// so that the same OTS is not used again. Because there is a limited number of +// OTS keys determined at key generation time, signing cannot be performed after +// all OTSs are used. This is an important distinction from other digital signature +// schemes such as RSA or ECDSA. +// +// It is crucial for the security of the *stateful* hash-based signatures that the +// same private key state NOT be used more than once to sign messages, otherwise all +// security is lost. +// + + +//======================================================================== +// XMSS API +// +// XMSS is a stateful hash-based signature scheme specified in RFC 8391. The +// multi-tree variant is named XMSS^MT. +// +// XMSS uses WOTS+ as the one-time-signature (OTS) scheme. Public key consists +// of two parts; Merkle-tree hash of OTS public keys called the Root, and a Seed value +// used in in hash computations. The private key consists of SK_XMSS which is +// used to deterministically create OTS keys, SK_PRF which is used to generate +// the randomizer for hashing, and an integer Idx is used to select the next OTS key +// for signing. +// + +typedef enum _SYMCRYPT_XMSS_ALGID +{ + // Hash Fn. RFC-8391 SP800-208 + SYMCRYPT_XMSS_SHA2_10_256 = 0x00000001, // SHA-256 X X + SYMCRYPT_XMSS_SHA2_16_256 = 0x00000002, // SHA-256 X X + SYMCRYPT_XMSS_SHA2_20_256 = 0x00000003, // SHA-256 X X + SYMCRYPT_XMSS_SHA2_10_512 = 0x00000004, // SHA-512 X + SYMCRYPT_XMSS_SHA2_16_512 = 0x00000005, // SHA-512 X + SYMCRYPT_XMSS_SHA2_20_512 = 0x00000006, // SHA-512 X + SYMCRYPT_XMSS_SHAKE_10_256 = 0x00000007, // SHAKE128 X + SYMCRYPT_XMSS_SHAKE_16_256 = 0x00000008, // SHAKE128 X + SYMCRYPT_XMSS_SHAKE_20_256 = 0x00000009, // SHAKE128 X + SYMCRYPT_XMSS_SHAKE_10_512 = 0x0000000A, // SHAKE256 X + SYMCRYPT_XMSS_SHAKE_16_512 = 0x0000000B, // SHAKE256 X + SYMCRYPT_XMSS_SHAKE_20_512 = 0x0000000C, // SHAKE256 X + SYMCRYPT_XMSS_SHA2_10_192 = 0x0000000D, // SHA-256 X + SYMCRYPT_XMSS_SHA2_16_192 = 0x0000000E, // SHA-256 X + SYMCRYPT_XMSS_SHA2_20_192 = 0x0000000F, // SHA-256 X + SYMCRYPT_XMSS_SHAKE256_10_256 = 0x00000010, // SHAKE256 X + SYMCRYPT_XMSS_SHAKE256_16_256 = 0x00000011, // SHAKE256 X + SYMCRYPT_XMSS_SHAKE256_20_256 = 0x00000012, // SHAKE256 X + SYMCRYPT_XMSS_SHAKE256_10_192 = 0x00000013, // SHAKE256 X + SYMCRYPT_XMSS_SHAKE256_16_192 = 0x00000014, // SHAKE256 X + SYMCRYPT_XMSS_SHAKE256_20_192 = 0x00000015, // SHAKE256 X + +} SYMCRYPT_XMSS_ALGID; + +typedef enum _SYMCRYPT_XMSSMT_ALGID +{ + // Hash Fn. RFC-8391 SP800-208 + // SHA-256 X X + SYMCRYPT_XMSSMT_SHA2_20_2_256 = 0x00000001, + SYMCRYPT_XMSSMT_SHA2_20_4_256 = 0x00000002, + SYMCRYPT_XMSSMT_SHA2_40_2_256 = 0x00000003, + SYMCRYPT_XMSSMT_SHA2_40_4_256 = 0x00000004, + SYMCRYPT_XMSSMT_SHA2_40_8_256 = 0x00000005, + SYMCRYPT_XMSSMT_SHA2_60_3_256 = 0x00000006, + SYMCRYPT_XMSSMT_SHA2_60_6_256 = 0x00000007, + SYMCRYPT_XMSSMT_SHA2_60_12_256 = 0x00000008, + + // SHA-512 X + SYMCRYPT_XMSSMT_SHA2_20_2_512 = 0x00000009, + SYMCRYPT_XMSSMT_SHA2_20_4_512 = 0x0000000A, + SYMCRYPT_XMSSMT_SHA2_40_2_512 = 0x0000000B, + SYMCRYPT_XMSSMT_SHA2_40_4_512 = 0x0000000C, + SYMCRYPT_XMSSMT_SHA2_40_8_512 = 0x0000000D, + SYMCRYPT_XMSSMT_SHA2_60_3_512 = 0x0000000E, + SYMCRYPT_XMSSMT_SHA2_60_6_512 = 0x0000000F, + SYMCRYPT_XMSSMT_SHA2_60_12_512 = 0x00000010, + + // SHAKE128 X + SYMCRYPT_XMSSMT_SHAKE_20_2_256 = 0x00000011, + SYMCRYPT_XMSSMT_SHAKE_20_4_256 = 0x00000012, + SYMCRYPT_XMSSMT_SHAKE_40_2_256 = 0x00000013, + SYMCRYPT_XMSSMT_SHAKE_40_4_256 = 0x00000014, + SYMCRYPT_XMSSMT_SHAKE_40_8_256 = 0x00000015, + SYMCRYPT_XMSSMT_SHAKE_60_3_256 = 0x00000016, + SYMCRYPT_XMSSMT_SHAKE_60_6_256 = 0x00000017, + SYMCRYPT_XMSSMT_SHAKE_60_12_256 = 0x00000018, + + // SHAKE256 X + SYMCRYPT_XMSSMT_SHAKE_20_2_512 = 0x00000019, + SYMCRYPT_XMSSMT_SHAKE_20_4_512 = 0x0000001A, + SYMCRYPT_XMSSMT_SHAKE_40_2_512 = 0x0000001B, + SYMCRYPT_XMSSMT_SHAKE_40_4_512 = 0x0000001C, + SYMCRYPT_XMSSMT_SHAKE_40_8_512 = 0x0000001D, + SYMCRYPT_XMSSMT_SHAKE_60_3_512 = 0x0000001E, + SYMCRYPT_XMSSMT_SHAKE_60_6_512 = 0x0000001F, + SYMCRYPT_XMSSMT_SHAKE_60_12_512 = 0x00000020, + + // SHA-256 X + SYMCRYPT_XMSSMT_SHA2_20_2_192 = 0x00000021, + SYMCRYPT_XMSSMT_SHA2_20_4_192 = 0x00000022, + SYMCRYPT_XMSSMT_SHA2_40_2_192 = 0x00000023, + SYMCRYPT_XMSSMT_SHA2_40_4_192 = 0x00000024, + SYMCRYPT_XMSSMT_SHA2_40_8_192 = 0x00000025, + SYMCRYPT_XMSSMT_SHA2_60_3_192 = 0x00000026, + SYMCRYPT_XMSSMT_SHA2_60_6_192 = 0x00000027, + SYMCRYPT_XMSSMT_SHA2_60_12_192 = 0x00000028, + + // SHAKE256 X + SYMCRYPT_XMSSMT_SHAKE256_20_2_256 = 0x00000029, + SYMCRYPT_XMSSMT_SHAKE256_20_4_256 = 0x0000002A, + SYMCRYPT_XMSSMT_SHAKE256_40_2_256 = 0x0000002B, + SYMCRYPT_XMSSMT_SHAKE256_40_4_256 = 0x0000002C, + SYMCRYPT_XMSSMT_SHAKE256_40_8_256 = 0x0000002D, + SYMCRYPT_XMSSMT_SHAKE256_60_3_256 = 0x0000002E, + SYMCRYPT_XMSSMT_SHAKE256_60_6_256 = 0x0000002F, + SYMCRYPT_XMSSMT_SHAKE256_60_12_256 = 0x00000030, + + // SHAKE256 X + SYMCRYPT_XMSSMT_SHAKE256_20_2_192 = 0x00000031, + SYMCRYPT_XMSSMT_SHAKE256_20_4_192 = 0x00000032, + SYMCRYPT_XMSSMT_SHAKE256_40_2_192 = 0x00000033, + SYMCRYPT_XMSSMT_SHAKE256_40_4_192 = 0x00000034, + SYMCRYPT_XMSSMT_SHAKE256_40_8_192 = 0x00000035, + SYMCRYPT_XMSSMT_SHAKE256_60_3_192 = 0x00000036, + SYMCRYPT_XMSSMT_SHAKE256_60_6_192 = 0x00000037, + SYMCRYPT_XMSSMT_SHAKE256_60_12_192 = 0x00000038, + +} SYMCRYPT_XMSSMT_ALGID; + + +typedef enum _SYMCRYPT_XMSSKEY_TYPE +{ + SYMCRYPT_XMSSKEY_TYPE_NONE = 0, + SYMCRYPT_XMSSKEY_TYPE_PUBLIC = 1, // Key object contains only public key + SYMCRYPT_XMSSKEY_TYPE_PRIVATE = 2, // Key object contains both public key and private key +} SYMCRYPT_XMSSKEY_TYPE; + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmssParamsFromAlgId( + SYMCRYPT_XMSS_ALGID id, + _Out_ PSYMCRYPT_XMSS_PARAMS pParams); +// +// Populate SYMCRYPT_XMSS_PARAMS structure for the specified XMSS algorithm identifier +// using the predefined parameter sets from RFC 8391 and NIST SP800-208 +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmssMtParamsFromAlgId( + SYMCRYPT_XMSSMT_ALGID id, + _Out_ PSYMCRYPT_XMSS_PARAMS pParams); +// +// Populate SYMCRYPT_XMSS_PARAMS structure for the specified XMSS^MT algorithm identifier +// using the predefined parameter sets from RFC 8391 and NIST SP800-208 +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmssSetParams( + _Out_ PSYMCRYPT_XMSS_PARAMS pParams, + UINT32 id, // algorithm identifier + _In_ PCSYMCRYPT_HASH pHash, // hash algorithm + UINT32 cbHashOutput, // hash output size + UINT32 nWinternitzWidth, // Winternitz parameter (width of digits) + UINT32 nTotalTreeHeight, // total tree height + UINT32 nLayers, // number of levels + UINT32 cbPrefix // domain separator prefix length + ); +// +// Populates SYMCRYPT_XMSS_PARAMS structure by user defined parameters +// +// +// Parameters: +// +// pParams. Pointer to the structure that will be populated with the +// supplied parameters. +// +// id. Algorithm identifier, will be embedded in key and signature objects. +// +// pHash. Pointer to a hash object that implements a hash function which +// will be used in XMSS/XMSS^MT operations. +// +// cbHashOutput. Output size of the hash function in bytes. Leading cbHashOutput +// bytes are taken as hash output if the hash algorithm's actual output size is larger. +// +// nWinternitzWidth. Winternitz parameter, width of digits in byte sequences. +// See remark below for more explanation. +// +// nTotalTreeHeight. Height of the XMSS/XMSS^MT tree. In a multi-tree setting, +// it is the sum of the tree heights of each layer. +// +// nLayers. Number of layers. For XMSS nLayers=1, otherwise nLayers > 1. When nLayers > 1, +// it must divide nTotalTreeHeight without remainder, so that each layer has height +// nTotalTreeHeight/nLayers. +// +// cbPrefix. Number of bytes in the prefix to the hash inputs used to domain separate +// PRF functions. +// +// Requirements: +// +// cbHashOutput must be nonzero, must be less than or equal to pHash->resultSize, +// and must be less than or equal to SYMCRYPT_HASH_MAX_RESULT_SIZE +// +// nWinternitzWidth must be one of 1, 2, 4, or 8 +// +// nTotalTreeHeight must be non-zero, it must be less than or equal to 32 for +// single-tree (nLayers = 1), and must be less than 64 for multi-tree (nLayers > 1) +// +// nLayers must be non-zero and must divide nTotalTreeHeight without remainder +// +// cbPrefix must be non-zero +// +// Remarks: +// +// RFC 8391 specifies w as the length of the Winternitz chains. Here, +// it is used as the width of the digits in an octet string, i.e., +// base2 logarithm of the chain length, which is similar to its use +// in LMS/HSS in RFC 8554. +// + + +#define SYMCRYPT_FLAG_XMSSKEY_VERIFY_ROOT (0x00000001) +// Verifies the public root value when importing a private key + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmssSizeofKeyBlobFromParams( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams, + SYMCRYPT_XMSSKEY_TYPE keyType, + _Out_ SIZE_T* pcbKey ); +// +// Return the size of an XMSS/XMSS^MT key blob associated with the provided XMSS parameters +// +// +// Parameters: +// +// pParams. Pointer to an XMSS parameters structure that has been properly +// initialized before this call. +// +// keyType. SYMCRYPT_XMSSKEY_TYPE_PUBLIC (resp. SYMCRYPT_XMSSKEY_TYPE_PRIVATE) to +// retrieve the size of the public key (resp. private key) blob. +// +// pcbKey. Pointer to the variable to store the size of a public/private +// key blob associated with the XMSS parameters. +// + +PSYMCRYPT_XMSS_KEY +SYMCRYPT_CALL +SymCryptXmsskeyAllocate( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams, + UINT32 flags ); +// +// Allocate an XMSS/XMSS^MT key object and initialize it +// +// After this call, the key object does not contain a key yet. It must be +// followed by a call to SymCryptXmsskeyGenerate or SymCryptXmsskeySetValue. +// +// Allowed flags: +// +// No flags defined for this function +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmsskeyGenerate( + _Inout_ PSYMCRYPT_XMSS_KEY pKey, + UINT32 flags ); +// +// Generate a public/private XMSS/XMSS^MT key-pair +// +// Parameters: +// +// pKey. Key object to store the public/private key-pair +// +// flags. No flags defined for this function +// +// Return values: +// +// - SYMCRYPT_NO_ERROR +// On successful key generation +// +// - SYMCRYPT_MEMORY_ALLOCATION_FAILURE +// If there is not enough memory to perform key generation +// +// Remarks: +// +// - Generates a random private key (SK_XMSS, SK_PRF) and a random +// public seed SEED, and computes the public value Root from it. +// - If the function fails, the key object will be in an invalid state. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmsskeySetValue( + _In_reads_bytes_( cbInput ) PCBYTE pbInput, + SIZE_T cbInput, + SYMCRYPT_XMSSKEY_TYPE keyType, + UINT32 flags, + _Inout_ PSYMCRYPT_XMSS_KEY pKey ); +// +// Set an XMSS/XMSS^MT public/private key from key blob +// +// Key formats: +// +// PubKey: algId | Root | Seed +// PrvKey: algId | Root | Seed | Idx | SK_XMSS | SK_PRF +// +// algId and Idx are 32-bit and 64-bit integers respectively, stored in big-endian format. +// Other values are n-bytes where n is the output size (in bytes) of the hash +// algorithm (or the truncated size if the hash output is truncated). +// +// Public-key format is specified in RFC 8391, whereas private-key format is not. +// We define the private-key as an extension of the public-key with the private key +// material. +// +// Parameters: +// +// (pbInput, cbInput). Input key blob to import the key from +// +// keyType. Indicates whether (pbInput, cbInput) contains a public or a private key. +// Must be one of SYMCRYPT_XMSSKEY_TYPE_PUBLIC, or SYMCRYPT_XMSSKEY_TYPE_PRIVATE. +// +// flags. See below +// +// pKey. Pointer to the XMSS key object to be initialized from the key blob +// +// Allowed flags: +// +// - SYMCRYPT_FLAG_XMSSKEY_VERIFY_ROOT +// Can only be specified when importing a private key. Recomputes the +// public root value and compares it to the one that is imported from the +// key blob. +// +// Return values: +// +// - SYMCRYPT_NO_ERROR +// On successfully updating the key object from the provided key blob +// +// - SYMCRYPT_INVALID_ARGUMENT +// If cbInput does not match a public/private key size indicated by keyType parameter +// If an invalid flag is specified, or SYMCRYPT_FLAG_XMSSKEY_VERIFY_ROOT is +// specified when setting a public key +// +// - SYMCRYPT_INVALID_BLOB +// If the XMSS algorithm ID in the key blob does not match the algorithm ID +// used in creating the key object pointed to by pKey +// +// - SYMCRYPT_MEMORY_ALLOCATION_FAILURE +// If there is not sufficient memory for public root verification (only if +// SYMCRYPT_FLAG_XMSSKEY_VERIFY_ROOT is set in flags) +// +// - SYMCRYPT_HBS_PUBLIC_ROOT_MISMATCH +// If public root value in the key blob does not match the recomputed root value +// (only if key blob is for a private key and SYMCRYPT_FLAG_XMSSKEY_VERIFY_ROOT is +// specified) +// +// Remarks: +// +// - The key blob size pbInput must match the size returned by SymCryptXmssSizeofKeyBlobFromParams +// for the same keyType and XMSS parameters the key object is created with. +// - If the function fails, the key object will be in an invalid state. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmsskeyGetValue( + _In_ PCSYMCRYPT_XMSS_KEY pKey, + SYMCRYPT_XMSSKEY_TYPE keyType, + UINT32 flags, + _Out_writes_bytes_( cbOutput ) PBYTE pbOutput, + SIZE_T cbOutput ); +// +// Get public/private key value from an XMSS/XMSS^MT key object +// +// Key formats: +// +// PubKey: algId | Root | Seed +// PrvKey: algId | Root | Seed | Idx | SK_XMSS | SK_PRF +// +// algId and Idx are 32-bit and 64-bit integers respectively, stored in big-endian format. +// Other values are n-bytes where n is the output size (in bytes) of the hash +// algorithm (or the truncated size if the hash output is truncated). +// +// Public-key format is specified in RFC 8391, whereas private-key format is not. +// We define the private-key as an extension of the public-key with the private key +// material. +// +// Parameters: +// +// pKey. The key object to export the key material from +// +// keyType. Type of the key (public or private) to get. If the key object +// contains a public key, keyType must be SYMCRYPT_XMSSKEY_TYPE_PUBLIC. If +// the key object contains a private key, keyType can be one of +// SYMCRYPT_XMSSKEY_TYPE_PUBLIC or SYMCRYPT_XMSSKEY_TYPE_PRIVATE +// +// flags. No flags defined for this function +// +// (pbOutput, cbOutput). Buffer to store the exported key blob. cbOutput must match +// the size of the key to be exported, which can be queried by calling +// SymCryptXmssSizeofKeyBlobFromParams. +// +// Return values: +// +// - SYMCRYPT_NO_ERROR +// On successful exporting of the key +// +// - SYMCRYPT_INVALID_ARGUMENT +// If cbOutput does not match the exact size of the key blob for the specified +// keyType +// If the key object does not contain private key material when keyType +// equals SYMCRYPT_XMSSKEY_TYPE_PRIVATE +// If unsupported flags are specified in flags parameter +// + +VOID +SYMCRYPT_CALL +SymCryptXmsskeyFree( + _Inout_ PSYMCRYPT_XMSS_KEY pKey); +// +// Free an allocated XMSS/XMSS^MT key object +// + +SIZE_T +SYMCRYPT_CALL +SymCryptXmssSizeofSignatureFromParams( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams ); +// +// Return the size of the signature for given XMSS parameters +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmssSign( + _Inout_ PSYMCRYPT_XMSS_KEY pKey, + _In_reads_bytes_( cbMessage ) PCBYTE pbMessage, + SIZE_T cbMessage, + UINT32 flags, + _Out_writes_bytes_( cbSignature ) PBYTE pbSignature, + SIZE_T cbSignature ); +// +// Sign a message using XMSS/XMSS^MT +// +// Parameters: +// +// pKey. Private XMSS/XMSS^MT key used in signing +// +// (pbMessage, cbMessage). Message to be signed +// +// flags. No flags defined for this function +// +// (pbSignature, cbSignature). Buffer to store the generated signature +// +// Requirements: +// +// pKey must contain the private key +// +// cbSignature must be equal to the generated signature size +// +// Return values: +// +// - SYMCRYPT_NO_ERROR on successful signature generation +// +// - SYMCRYPT_INVALID_ARGUMENT +// If flags parameter is invalid, +// or if the key object does not contain private key, +// or cbSignature is not of correct size +// +// - SYMCRYPT_HBS_NO_OTS_KEYS_LEFT +// If the key doesn't have any one-time-signatures left for signing +// +// Remarks: +// +// The input pbMessage can be of arbitrary length and its randomized hash will be the actual +// value that is going to be signed with a WOTSP signature. Applications wanting to pass the hash +// value of a message to be signed as opposed to the message itself must make sure to have +// domain separation between the space of messages and the hashes of the messages. +// +// The signature size can be queried with SymCryptSizeofXmssSignatureFromParams function. +// + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmssVerify( + _Inout_ PSYMCRYPT_XMSS_KEY pKey, + _In_reads_bytes_( cbMessage ) PCBYTE pbMessage, + SIZE_T cbMessage, + UINT32 flags, + _In_reads_bytes_( cbSignature ) PCBYTE pbSignature, + SIZE_T cbSignature ); +// +// Verify an XMSS/XMSS^MT signature on a message +// +// Parameters: +// +// pKey. XMSS key used to verify the signature +// +// (pbMessage, cbMessage) Message for which the signature was created +// +// flags. No flags defined for this function +// +// (pbSignature, cbSignature) XMSS or XMSS^MT signature +// +// Return values: +// +// - SYMCRYPT_NO_ERROR +// If signature verification succeeds +// +// - SYMCRYPT_INVALID_ARGUMENT +// If flags is invalid or cbSignature is of incorrect size +// +// - SYMCRYPT_SIGNATURE_VERIFICATION_ERROR +// If the signature is not valid +// +// Requirements: +// +// cbSignature must be equal to the exact signature size associated with +// the XMSS parameters. +// +// Remarks: +// +// In XMSS, the message can be arbitrarily long and a randomized hash of the message +// will be computed first to be signed by the WOTSP internally. +// + +VOID +SYMCRYPT_CALL +SymCryptXmssSelftest(void); +// +// FIPS self-test for signature verification +// + +//======================================================================== +// Leighton-Micali Hash-Based Signatures (LMS) - LMS external struct definitions - implementing +// RFC8554/NIST Special Publication 800-208 +// +typedef enum _SYMCRYPT_LMS_ALGID +{ + // + // Algorithm IDs for Leighton-Micali Hash-Based Signatures (LMS) + // M equals the output length of the hash function, where H is the tree height. + // The M parameter primarily affects the security and size of the signatures, while the H parameter + // impacts the number of possible signatures and the computational cost for signing and verification. + // Larger M increases security and signature size, but increases the computational cost + // Higher H means more signatures but higher computational cost for signing and verification + // + SYMCRYPT_LMS_SHA256_M32_H5 = 0x00000005, + SYMCRYPT_LMS_SHA256_M32_H10 = 0x00000006, + SYMCRYPT_LMS_SHA256_M32_H15 = 0x00000007, + SYMCRYPT_LMS_SHA256_M32_H20 = 0x00000008, + SYMCRYPT_LMS_SHA256_M32_H25 = 0x00000009, + SYMCRYPT_LMS_SHA256_M24_H5 = 0x0000000A, + SYMCRYPT_LMS_SHA256_M24_H10 = 0x0000000B, + SYMCRYPT_LMS_SHA256_M24_H15 = 0x0000000C, + SYMCRYPT_LMS_SHA256_M24_H20 = 0x0000000D, + SYMCRYPT_LMS_SHA256_M24_H25 = 0x0000000E, + SYMCRYPT_LMS_SHAKE_M32_H5 = 0x0000000F, + SYMCRYPT_LMS_SHAKE_M32_H10 = 0x00000010, + SYMCRYPT_LMS_SHAKE_M32_H15 = 0x00000011, + SYMCRYPT_LMS_SHAKE_M32_H20 = 0x00000012, + SYMCRYPT_LMS_SHAKE_M32_H25 = 0x00000013, + SYMCRYPT_LMS_SHAKE_M24_H5 = 0x00000014, + SYMCRYPT_LMS_SHAKE_M24_H10 = 0x00000015, + SYMCRYPT_LMS_SHAKE_M24_H15 = 0x00000016, + SYMCRYPT_LMS_SHAKE_M24_H20 = 0x00000017, + SYMCRYPT_LMS_SHAKE_M24_H25 = 0x00000018, +} SYMCRYPT_LMS_ALGID; + +typedef enum _SYMCRYPT_LMS_OTS_ALGID +{ + // Algorithm IDs for Leighton-Micali Hash-Based Signatures (LMS) One-Time-Signature (OTS) + // N parameter represents the number of bytes in the hash function output. It determines the size of the hash values used in + // the LMS OTS scheme. + // W parameter represents the width of the Winternitz parameter used in LMS OTS. A larger value of w results in shorter + // signatures but requires more computation during key generation, signature generation, and signature verification. + // + SYMCRYPT_LMS_OTS_SHA256_N32_W1 = 0x00000001, + SYMCRYPT_LMS_OTS_SHA256_N32_W2 = 0x00000002, + SYMCRYPT_LMS_OTS_SHA256_N32_W4 = 0x00000003, + SYMCRYPT_LMS_OTS_SHA256_N32_W8 = 0x00000004, + SYMCRYPT_LMS_OTS_SHA256_N24_W1 = 0x00000005, + SYMCRYPT_LMS_OTS_SHA256_N24_W2 = 0x00000006, + SYMCRYPT_LMS_OTS_SHA256_N24_W4 = 0x00000007, + SYMCRYPT_LMS_OTS_SHA256_N24_W8 = 0x00000008, + SYMCRYPT_LMS_OTS_SHAKE_N32_W1 = 0x00000009, + SYMCRYPT_LMS_OTS_SHAKE_N32_W2 = 0x0000000A, + SYMCRYPT_LMS_OTS_SHAKE_N32_W4 = 0x0000000B, + SYMCRYPT_LMS_OTS_SHAKE_N32_W8 = 0x0000000C, + SYMCRYPT_LMS_OTS_SHAKE_N24_W1 = 0x0000000D, + SYMCRYPT_LMS_OTS_SHAKE_N24_W2 = 0x0000000E, + SYMCRYPT_LMS_OTS_SHAKE_N24_W4 = 0x0000000F, + SYMCRYPT_LMS_OTS_SHAKE_N24_W8 = 0x00000010, +} SYMCRYPT_LMS_OTS_ALGID; + +// Verifies the public key root value when importing a private key +#define SYMCRYPT_FLAG_LMSKEY_VERIFY_ROOT (0x00000001) + +typedef enum _SYMCRYPT_LMSKEY_TYPE +{ + SYMCRYPT_LMSKEY_TYPE_NONE = 0, // Key object does not contain any key material + SYMCRYPT_LMSKEY_TYPE_PUBLIC = 1, // Key object contains only public key + SYMCRYPT_LMSKEY_TYPE_PRIVATE = 2, // Key object contains both public key and private key +} SYMCRYPT_LMSKEY_TYPE; +// The format of the private key blob is as follows: +// [ Public key parts || Private key parts ] +// [ 4 || 4 || 16 || m || 4 || m ] +// [ LmsAlgId || LmsOtsAlgId || I || RootNode || NextUnusedLeaf || Seed ] +// +// The format of the public key blob is as follows: +// [ 4 || 4 || 16 || m ] +// [ LmsAlgId || LmsOtsAlgId || I || RootNode ] + +//===================================================== +// LMS operations + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLmsParamsFromAlgId( + SYMCRYPT_LMS_ALGID lmsAlgID, + SYMCRYPT_LMS_OTS_ALGID lmsOtsAlgID, + _Out_ PSYMCRYPT_LMS_PARAMS pParams); +// +// This function populates a SYMCRYPT_LMS_PARAMS structure with the predefined parameter sets for a given LMS +// algorithm identifier and LMS OTS algorithm identifier. The resulting structure can be used to create LMS key objects. +// The values defined by SYMCRYPT_LMS_OTS_ALGID and SYMCRYPT_LMS_ALGID are all of the NIST SP 800-208 parameter +// sets supported by SymCrypt. +// +// Parameters: +// lmsAlgID: The LMS algorithm identifier to use +// +// lmsOtsAlgID: The LMS OTS algorithm identifier to use +// +// pParams: A pointer to a SYMCRYPT_LMS_PARAMS structure that will be populated with the predefined parameter sets +// +// Return value: +// If the function succeeds, it returns SYMCRYPT_NO_ERROR +// If the function fails, it returns SYMCRYPT_INVALID_ARGUMENT +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLmsSetParams( + _Out_ PSYMCRYPT_LMS_PARAMS pParams, + UINT32 lmsAlgID, + UINT32 lmsOtsAlgID, + _In_ PCSYMCRYPT_HASH pLmsHashFunction, + UINT32 cbHashOutput, + UINT32 nTreeHeight, + UINT32 nWinternitzChainWidth); +// +// This function allows for the customization of non-standard parameter sets, which cannot be set using LmsParamsFromAlgId. +// +// Parameters: +// pParams: A pointer to a SYMCRYPT_LMS_PARAMS structure to be initialized. +// +// lmsAlgID: LMS algorithm identifier, will be embedded in key and signature objects. +// +// lmsOtsAlgID: LMS OTS algorithm identifier, will be embedded in key and signature objects. +// +// pLmsHashFunction: A pointer to the hash function to be used for the LMS system. +// +// cbHashOutput: The number of bytes for each tree node, equal to the output length of the hash function. +// Must be less than or equal to 32. +// +// nTreeHeight: The height of the LMS tree. Must be < 32, there are (2^nTreeHeight) leaves in the tree. +// +// nWinternitzChainWidth: An integer that specifies the base2 logarithm of Winternitz chain lengths. +// Must be one of 1, 2, 4, or 8 +// +// Return value: +// If the function succeeds, it fills PSYMCRYPT_LMS_PARAMS structure by user defined values and return SYMCRYPT_NO_ERROR. +// Otherwise, it sets the values of PSYMCRYPT_LMS_PARAMS to 0 and returns SYMCRYPT_INVALID_ARGUMENT. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLmsSizeofKeyBlobFromParams( + _In_ PCSYMCRYPT_LMS_PARAMS pParams, + SYMCRYPT_LMSKEY_TYPE keyType, + _Out_ SIZE_T* pcbKey); +// +// Returns the size of an LMS key blob based on the provided LMS parameters and keyType. +// +// Parameters: +// pParams: A pointer to a SYMCRYPT_LMS_PARAMS structure that specifies the parameters of the LMS key. +// +// keyType: Specifies the type of blob for which to retrieve the size. +// Must be one of SYMCRYPT_LMSKEY_TYPE_PUBLIC or SYMCRYPT_LMSKEY_TYPE_PRIVATE. +// +// pcbKey: Pointer to the variable to store the size of a public/private +// key blob associated with the LMS parameters. +// +// Return value: +// If the function succeeds, it returns SYMCRYPT_NO_ERROR. In case keyType is not recognized +// it returns SYMCRYPT_INVALID_ARGUMENT. +// + +PSYMCRYPT_LMS_KEY +SYMCRYPT_CALL +SymCryptLmskeyAllocate( + _In_ PCSYMCRYPT_LMS_PARAMS pParams, + UINT32 flags); +// +// This function allocates a new SYMCRYPT_LMS_KEY object, which represents a key for the Leighton-Micali Signature (LMS) +// scheme, based on the given PCSYMCRYPT_LMS_PARAMS. The function allocates memory for the key object, and returns a pointer to it. +// The caller is responsible for freeing the memory when the key is no longer needed, using the SymCryptLmskeyFree function. +// +// Parameters: +// pParams: A pointer to a constant SYMCRYPT_LMS_PARAMS structure that describes +// the LMS parameters to be used for the key. +// The structure must be non-null, and must be initialized by one of the initialization functions: +// SymCryptLmsParamsFromAlgId or SymCryptLmsSetParams. +// +// flags: Currently not used. Must be set to 0. +// +// Return value: +// If the function succeeds, it returns a pointer to the newly created SYMCRYPT_LMS_KEY object. +// Otherwise, it returns NULL, indicating an error that should be handled by the caller. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLmskeyGenerate( + _Inout_ PSYMCRYPT_LMS_KEY pKey, + UINT32 flags); +// +// This function generates an LMS public/private key pair in the pKey object. +// +// Parameters: +// pKey: A pointer to a SYMCRYPT_LMS_KEY structure that represents the LMS key object to be initialized. The structure +// must be valid and non-null, and must have been previously created using the SymCryptLmskeyAllocate +// function. If the key object already contains key values, they will be overwritten by the generated values. +// +// flags: Currently not used. Must be set to 0. +// +// Return value: +// If the function succeeds, it returns SYMCRYPT_NO_ERROR. Otherwise, it returns an error code that describes the nature of the +// failure, such as SYMCRYPT_INVALID_ARGUMENT. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLmskeySetValue( + _In_reads_bytes_(cbInput) PCBYTE pbInput, + SIZE_T cbInput, + SYMCRYPT_LMSKEY_TYPE keyType, + UINT32 flags, + _Inout_ PSYMCRYPT_LMS_KEY pKey); +// +// This function imports an LMS key from a buffer, setting the key object with the provided data. +// +// Parameters: +// pbInput: A pointer to a byte buffer containing the key data to be imported into the LMS key object +// The format of the input buffer is specified by the SYMCRYPT_LMSKEY_TYPE enumeration. +// +// cbInput: The size, in bytes, of the key data buffer pointed to by pbInput +// +// keyType: Indicates whether (pbInput, cbInput) contains a public or a private key. +// Must be one of SYMCRYPT_LMSKEY_TYPE_PUBLIC, or SYMCRYPT_LMSKEY_TYPE_PRIVATE. +// +// flags: See allowed flags below. +// +// pKey: A pointer to a SYMCRYPT_LMS_KEY structure that will receive the imported key from the buffer pbInput +// +// Allowed flags: +// SYMCRYPT_FLAG_LMSKEY_VERIFY_ROOT: Can only be specified when importing a private key. Recomputes the +// public root value and compares it to the one that is imported from the key blob. +// +// Return value: +// If the function succeeds, it returns SYMCRYPT_NO_ERROR and the SYMCRYPT_LMS_KEY structure is set with the imported key data. +// If the function fails, it returns an error code that describes the nature of the failure, such as SYMCRYPT_INVALID_ARGUMENT. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLmskeyGetValue( + _In_ PCSYMCRYPT_LMS_KEY pKey, + SYMCRYPT_LMSKEY_TYPE keyType, + UINT32 flags, + _Out_writes_bytes_(cbOutput) PBYTE pbOutput, + SIZE_T cbOutput); +// +// This function retrieves the public or private key value from an LMS key object, depending on the keyType parameter +// +// Parameters: +// pKey: A pointer to a SYMCRYPT_LMS_KEY structure that represents the LMS key object to retrieve the key value from. +// The structure must be valid and non-null, and must contain the key values to retrieve +// +// keyType: Type of the key (public or private) to get. If the key object only +// contains a public key, keyType must be SYMCRYPT_LMSKEY_TYPE_PUBLIC. If +// the key object contains a private key, keyType can be one of +// SYMCRYPT_LMSKEY_TYPE_PUBLIC or SYMCRYPT_LMSKEY_TYPE_PRIVATE +// +// flags: Currently not used. Must be set to 0. +// +// pbOutput: A buffer to hold the key value. The buffer must be large enough to hold the key value. +// The format of the output buffer is specified by the SYMCRYPT_LMSKEY_TYPE enumeration. +// +// cbOutput: The size of the pbOutput buffer in bytes +// +// Return value: +// If the function succeeds, it returns SYMCRYPT_NO_ERROR. Otherwise, it returns SYMCRYPT_INVALID_ARGUMENT. +// + +VOID +SYMCRYPT_CALL +SymCryptLmskeyFree( + _Inout_ PSYMCRYPT_LMS_KEY pKey); +// +// This function frees the memory that was allocated for the given LMS key object, which was previously created using the +// SymCryptLmskeyAllocate function. The function wipes and deallocates the memory. +// +// Parameters: +// pKey: A pointer to a SYMCRYPT_LMS_KEY structure that represents the LMS key object to be freed. The structure +// must be valid and non-null, and must have been previously created using the SymCryptLmskeyAllocate function. +// +// Return value: +// The function does not return a value. +// + +SIZE_T +SYMCRYPT_CALL +SymCryptLmsSizeofSignatureFromParams( + _In_ PCSYMCRYPT_LMS_PARAMS pParams); +// +// This function returns the size, in bytes, of the signature that will be generated by the LMS signature scheme, based on the +// specified LMS parameters. +// +// Parameters: +// pParams: A pointer to a SYMCRYPT_LMS_PARAMS structure that represents the parameters associated with the LMS key to +// use for computing the signature size. The structure must be valid and non-null. +// +// Return value: +// Signature size in bytes. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLmsSign( + _Inout_ PSYMCRYPT_LMS_KEY pKey, + _In_reads_bytes_(cbMessage) PCBYTE pbMessage, + SIZE_T cbMessage, + UINT32 flags, + _Out_writes_bytes_(cbSignature) PBYTE pbSignature, + SIZE_T cbSignature); +// +// This function generates an LMS signature for the given message, using the private key in the given LMS key object. +// The function fills the buffer pointed to by pbSignature with the LMS signature. It uses the LMS parameters +// and key values that were specified when the key object was created to generate the signature. +// Stateful hash-based signatures are not approved by FIPS for key generation and signature generation in software +// modules. Special care must be taken to ensure that the same private key state is not used more than once to +// sign messages. This can be done, for instance, by releasing a signature only after verifying that the private +// key has been updated and serialized to a physical storage. +// +// Parameters: +// pKey: A pointer to a SYMCRYPT_LMS_KEY structure that represents the LMS key object to be used for signing the message. +// The structure must be valid and non-null, and must contain the private key values for the LMS scheme. The private key +// must have been initialized previously using the SymCryptLmsKeyGenerate or SymCryptLmskeySetValue function. +// +// pbMessage: A pointer to a buffer that contains the message to be signed. +// +// cbMessage: The length in bytes of the message to be signed. +// +// flags: Currently not used. Must be set to 0. +// +// pbSignature: A pointer to the buffer that receives the computed signature. It must be large enough to hold the +// generated signature. The required size can be retrieved using: SymCryptLmsSizeofSignatureFromParams. +// +// cbSignature: The size of the signature buffer pbSignature. If the passed size is different than the +// required signature size an error will be returned. +// +// Return value: +// SYMCRYPT_NO_ERROR - If the function succeeds +// +// SYMCRYPT_HBS_NO_OTS_KEYS_LEFT - If the key has run out of available OTS keys +// +// SYMCRYPT_INVALID_ARGUMENT - If one of the input parameters is invalid +// +// Remarks: +// The LMS signing process inherits its signature from the LMS OTS, which means that it will always compute a digest of the +// given message before signing, even if a hash value is provided as the message. +// Developers should always be consistent with the input to the LMS sign and verify functions and ensure that the input message +// is in the correct format before passing it to these functions +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLmsVerify( + _In_ PCSYMCRYPT_LMS_KEY pKey, + _In_reads_bytes_(cbMessage) PCBYTE pbMessage, + SIZE_T cbMessage, + UINT32 flags, + _In_reads_bytes_(cbSignature) PCBYTE pbSignature, + SIZE_T cbSignature); +// +// This function verifies the given LMS signature (pbSignature) for the given message (pbMessage), using the public key +// in the given LMS key object. The function returns SYMCRYPT_NO_ERROR if the signature is valid, and an error code otherwise. +// +// Parameters: +// pKey: A pointer to a SYMCRYPT_LMS_KEY structure that represents the LMS key object to be used for verifying +// the signature. The structure must be valid and non-null, and must contain the public or private key values for the LMS scheme. +// The public key must have been generated previously using the SymCryptLmsKeyGenerate or SymCryptLmskeySetValue functions, and must match the +// private key that was used to generate the signature. +// +// pbMessage: A pointer to a buffer that contains the message that was signed.The buffer must be valid and non-null, and +// must contain at least cbMessage bytes of data. +// +// cbMessage: The length in bytes of the message that was signed. The length should be set to the actual size of the message. +// If the message is larger than the maximum size allowed by the LMS parameters, the function will return an error. +// +// flags: Currently not used. Must be set to 0. +// +// pbSignature: A pointer to a buffer that contains the signature that was generated for the message. The buffer must +// be valid and non-null, and must contain at least cbSignature bytes. +// +// cbSignature: The length in bytes of the signature buffer that contains the signature. The length must be +// equal to the exact signature size associated with the given LMS parameters and key values. +// +// Return value: +// SYMCRYPT_NO_ERROR - If the function succeeds +// +// SYMCRYPT_INVALID_ARGUMENT - If the signature structure is not correct or if there is a mismatch between the +// input parameters. +// +// SYMCRYPT_SIGNATURE_VERIFICATION_FAILURE - If the signature verification fails +// + +VOID +SYMCRYPT_CALL +SymCryptLmsSelftest(void); + + +// MLKEMKEY objects' API +// + +// MLKEM key formats +// ================== +// The below formats apply **only to external formats**: When somebody is importing or exporting +// a key. The internal format of the keys is not visible to the caller. +typedef enum _SYMCRYPT_MLKEMKEY_FORMAT { + SYMCRYPT_MLKEMKEY_FORMAT_NULL = 0, + SYMCRYPT_MLKEMKEY_FORMAT_PRIVATE_SEED = 1, + // 64-byte concatenation of d || z from FIPS 203. Smallest representation of a full + // ML-KEM key. + // On its own it is ambiguous which ML-KEM parameter set this represents; callers wanting to + // store this format must track the parameter set alongside the key. + SYMCRYPT_MLKEMKEY_FORMAT_DECAPSULATION_KEY = 2, + // Standard byte encoding of an ML-KEM Decapsulation key, per FIPS 203. + // Size is 1632, 2400, or 3168 bytes for ML-KEM 512, 768, and 1024 respectively. + SYMCRYPT_MLKEMKEY_FORMAT_ENCAPSULATION_KEY = 3, + // Standard byte encoding of an ML-KEM Encapsulation key, per FIPS 203. + // Size is 800, 1184, or 1568 bytes for ML-KEM 512, 768, and 1024 respectively. +} SYMCRYPT_MLKEMKEY_FORMAT; + + +typedef enum _SYMCRYPT_MLKEM_PARAMS { + SYMCRYPT_MLKEM_PARAMS_NULL = 0, + SYMCRYPT_MLKEM_PARAMS_MLKEM512 = 1, + SYMCRYPT_MLKEM_PARAMS_MLKEM768 = 2, + SYMCRYPT_MLKEM_PARAMS_MLKEM1024 = 3, +} SYMCRYPT_MLKEM_PARAMS; +// +// Currently supported ML-KEM parameter sets are represented externally only by the enum +// + +PSYMCRYPT_MLKEMKEY +SYMCRYPT_CALL +SymCryptMlKemkeyAllocate( + SYMCRYPT_MLKEM_PARAMS params ); +// +// Allocate and create a new MLKEMKEY object sized according to the specified parameters. +// +// This call does not initialize the key. It should be +// followed by a call to SymCryptMlKemkeyGenerate or +// SymCryptMlKemkeySetValue. +// + +VOID +SYMCRYPT_CALL +SymCryptMlKemkeyFree( + _Inout_ PSYMCRYPT_MLKEMKEY pkMlKemkey ); + + +// d and z are each 32 bytes +#define SYMCRYPT_MLKEM_PRIVATE_SEED_SIZE (2*32) + +#define SYMCRYPT_MLKEM_ENCAPSULATION_KEY_SIZE_MLKEM512 (800) +#define SYMCRYPT_MLKEM_ENCAPSULATION_KEY_SIZE_MLKEM768 (1184) +#define SYMCRYPT_MLKEM_ENCAPSULATION_KEY_SIZE_MLKEM1024 (1568) + +#define SYMCRYPT_MLKEM_DECAPSULATION_KEY_SIZE_MLKEM512 (1632) +#define SYMCRYPT_MLKEM_DECAPSULATION_KEY_SIZE_MLKEM768 (2400) +#define SYMCRYPT_MLKEM_DECAPSULATION_KEY_SIZE_MLKEM1024 (3168) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlKemSizeofKeyFormatFromParams( + SYMCRYPT_MLKEM_PARAMS params, + SYMCRYPT_MLKEMKEY_FORMAT mlKemkeyformat, + _Out_ SIZE_T* pcbKeyFormat ); +// +// Gives the size in bytes of the blob of the given format for the given ML-KEM +// parameters via pcbKeyFormat output. +// Returns SYMCRYPT_INCOMPATIBLE_FORMAT if mlKemkeyFormat is an unsupported value, +// or SYMCRYPT_INVALID_ARGUMENT if other parameters are invalid. +// + +#define SYMCRYPT_MLKEM_CIPHERTEXT_SIZE_MLKEM512 (768) +#define SYMCRYPT_MLKEM_CIPHERTEXT_SIZE_MLKEM768 (1088) +#define SYMCRYPT_MLKEM_CIPHERTEXT_SIZE_MLKEM1024 (1568) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlKemSizeofCiphertextFromParams( + SYMCRYPT_MLKEM_PARAMS params, + _Out_ SIZE_T* pcbCiphertext ); +// +// Gives the size in bytes of the ciphertext for the given ML-KEM parameters. +// Returns SYMCRYPT_INVALID_ARGUMENT if parameters are invalid. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlKemkeyGenerate( + _Inout_ PSYMCRYPT_MLKEMKEY pkMlKemkey, + UINT32 flags ); +// +// Generate a new random ML-KEM key using the information from the +// parameters passed to SymCryptMlKemkeyAllocate. +// +// Allowed flags: +// +// - SYMCRYPT_FLAG_KEY_NO_FIPS +// Opt-out of performing validation required for FIPS +// +// Described in more detail in the "Flags for asymmetric key generation and import" section above +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlKemkeySetValue( + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_MLKEMKEY_FORMAT mlKemkeyFormat, + UINT32 flags, + _Inout_ PSYMCRYPT_MLKEMKEY pkMlKemkey ); +// +// Import key material to an ML-KEM key object. The arguments are the following: +// - (pbSrc, cbSrc): a buffer containing a representation of an ML-KEM key, +// in format specified by mlKemkeyFormat. +// - mlKemkeyFormat format of the input +// +// Allowed flags: +// +// - SYMCRYPT_FLAG_KEY_NO_FIPS +// Opt-out of performing validation required for FIPS +// +// - SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION +// Opt-out of performing almost all validation - must be specified with SYMCRYPT_FLAG_KEY_NO_FIPS +// +// Remarks: +// - cbSrc must be equal to the cbKeyFormat returned from +// SymCryptMlKemSizeofKeyFormatFromParams(params, mlKemkeyFormat, &cbKeyFormat), though typically this +// value can be known statically (see definition of SYMCRYPT_MLKEMKEY_FORMAT) +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlKemkeyGetValue( + _In_ PCSYMCRYPT_MLKEMKEY pkMlKemkey, + _Out_writes_bytes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst, + SYMCRYPT_MLKEMKEY_FORMAT mlKemkeyFormat, + UINT32 flags ); +// +// Export key material from an ML-KEM key object. The arguments are the following: +// - (pbDst, cbDst): a buffer into which a representation of an ML-KEM key is +// written, in the format specified by mlKemkeyFormat. +// - mlKemkeyFormat format of the output +// +// Allowed flags: +// - None. +// +// Remarks: +// - If the key object does not have the information required to export to the format +// specified by mlKemkeyFormat this function will return SYMCRYPT_INCOMPATIBLE_FORMAT. +// - cbDst must be equal to the cbKeyFormat returned from +// SymCryptMlKemSizeofKeyFormatFromParams(params, mlKemkeyFormat, &cbKeyFormat), though typically this +// value can be known statically (see definition of SYMCRYPT_MLKEMKEY_FORMAT) +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlKemEncapsulate( + _In_ PCSYMCRYPT_MLKEMKEY pkMlKemkey, + _Out_writes_bytes_( cbAgreedSecret ) PBYTE pbAgreedSecret, + SIZE_T cbAgreedSecret, + _Out_writes_bytes_( cbCiphertext ) PBYTE pbCiphertext, + SIZE_T cbCiphertext ); +// +// Performs the Encapsulate operation of ML-KEM. +// This uses the public information of an ML-KEM keypair to generate an agreed secret +// and a ciphertext. Only a peer with the private information of an ML-KEM keypair can +// decapsulate the ciphertext to compute the agreed secret. +// +// The arguments are the following: +// - pkMlKemkey: a key which contains public information required for encapsulation. +// - (pbAgreedSecret, cbAgreedSecret): a buffer into which the generated secret is written. +// Currently cbAgreedSecret must be 32 for all parameterizations of ML-KEM. +// - (pbCiphertext, cbCiphertext): a buffer into which the encapsulated secret is written. +// cbCiphertext must equal cbCiphertext given by SymCryptMlKemSizeofCiphertextFromParams, +// though typically this value can be known statically (see definition of +// SYMCRYPT_MLKEM_CIPHERTEXT_SIZE_*). +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlKemDecapsulate( + _In_ PCSYMCRYPT_MLKEMKEY pkMlKemkey, + _In_reads_bytes_( cbCiphertext ) PCBYTE pbCiphertext, + SIZE_T cbCiphertext, + _Out_writes_bytes_( cbAgreedSecret ) PBYTE pbAgreedSecret, + SIZE_T cbAgreedSecret ); +// +// Performs the Decapsulate operation of ML-KEM. +// This uses the private information of an ML-KEM keypair to generate an agreed +// secret from a ciphertext. +// +// The arguments are the following: +// - pkMlKemkey: a key which contains private information required for decapsulation. +// - (pbCiphertext, cbCiphertext): a buffer containing an encapsulated secret. +// cbCiphertext must equal cbCiphertext given by SymCryptMlKemSizeofCiphertextFromParams, +// though typically this value can be known statically (see definition of +// SYMCRYPT_MLKEM_CIPHERTEXT_SIZE_*). +// - (pbAgreedSecret, cbAgreedSecret): a buffer into which the generated secret is written. +// Currently cbAgreedSecret must be 32 for all parameterizations of ML-KEM. +// +// Note: Given an invalid, but correctly-sized, ciphertext, the ML-KEM Decapsulation operation +// will "implicitly reject" the ciphertext, by returning success in equal time to a valid +// decapsulation operation, with pseudo-random agreed secret output. This forces higher +// level protocols to fail later when symmetric keys of peers do not match. +// So decapsulate will only ever return an error if there are programming errors (e.g. incorrect size), +// or something fundamentally goes wrong with the environment (e.g. internal memory allocation fails). +// + +VOID +SYMCRYPT_CALL +SymCryptMlKemSelftest(void); +// +// FIPS self-test for ML-KEM. If the self-test fails, SymCryptFatal will be called to fastfail. +// The self-test will automatically be performed before first operational use of ML-KEM if using +// keys with FIPS validation, so most callers should never use this function. +// + +// +// COMPOSITE MLKEMKEY objects' API +// +// The below formats apply **only to external formats**: When somebody is importing or exporting +// a key. The internal format of the keys is not visible to the caller. +typedef enum _SYMCRYPT_COMPOSITE_MLKEMKEY_FORMAT { + SYMCRYPT_COMPOSITE_MLKEMKEY_FORMAT_NULL = 0, + SYMCRYPT_COMPOSITE_MLKEMKEY_FORMAT_IRTF_PRIVATE_SEED = 1, + // 32-byte seed for deriving Composite ML-KEM key, per irtf-cfrg-hybrid-kems CG framework + SYMCRYPT_COMPOSITE_MLKEMKEY_FORMAT_LAMPS_PRIVATE_KEY = 2, + // Standard byte encoding of a Composite ML-KEM private key, per LAMPS composite ML-KEM draft 12. + // Concatenation of ML-KEM private seed and private key of the traditional component: + // mlkemSeed || tradSK + // Size in bytes are MLKEM768_P256: 115, MLKEM768_X25519: 96, MLKEM1024_P384: 128 + SYMCRYPT_COMPOSITE_MLKEMKEY_FORMAT_PUBLIC_KEY = 3, + // Standard byte encoding of a Composite ML-KEM public key, per irtf-cfrg-hybrid-kems CG framework + // and LAMPS composite ML-KEM draft 12. + // Concatenation of ML-KEM encapsulation key and public key of the traditional component: + // mlkemPK || tradPK + // Size in bytes are MLKEM768_P256: 1249, MLKEM768_X25519: 1216, MLKEM1024_P384: 1665 +} SYMCRYPT_COMPOSITE_MLKEMKEY_FORMAT; + + +typedef enum _SYMCRYPT_COMPOSITE_MLKEM_PARAMS { + SYMCRYPT_COMPOSITE_MLKEM_PARAMS_NULL = 0, + SYMCRYPT_COMPOSITE_MLKEM_PARAMS_MLKEM768_P256 = 1, + SYMCRYPT_COMPOSITE_MLKEM_PARAMS_MLKEM768_X25519 = 2, + SYMCRYPT_COMPOSITE_MLKEM_PARAMS_MLKEM1024_P384 = 3, +} SYMCRYPT_COMPOSITE_MLKEM_PARAMS; +// +// Currently supported Composite ML-KEM parameter sets are represented externally only by the enum +// + +PSYMCRYPT_COMPOSITE_MLKEMKEY +SYMCRYPT_CALL +SymCryptCompositeMlKemkeyAllocate( + SYMCRYPT_COMPOSITE_MLKEM_PARAMS params ); +// +// Allocate and create a new COMPOSITE_MLKEMKEY object sized according to the specified parameters. +// +// This call does not initialize the key. It should be +// followed by a call to SymCryptCompositeMlKemkeyGenerate or +// SymCryptCompositeMlKemkeySetValue. +// + +VOID +SYMCRYPT_CALL +SymCryptCompositeMlKemkeyFree( + _Inout_ PSYMCRYPT_COMPOSITE_MLKEMKEY pkCompositeMlKemkey ); + + +#define SYMCRYPT_COMPOSITE_MLKEM_IRTF_PRIVATE_SEED_SIZE (32) + +#define SYMCRYPT_COMPOSITE_MLKEM_LAMPS_PRIVATE_KEY_SIZE_MLKEM768_P256 (115) +#define SYMCRYPT_COMPOSITE_MLKEM_LAMPS_PRIVATE_KEY_SIZE_MLKEM768_X25519 (96) +#define SYMCRYPT_COMPOSITE_MLKEM_LAMPS_PRIVATE_KEY_SIZE_MLKEM1024_P384 (128) + +#define SYMCRYPT_COMPOSITE_MLKEM_PUBLIC_KEY_SIZE_MLKEM768_P256 (1249) +#define SYMCRYPT_COMPOSITE_MLKEM_PUBLIC_KEY_SIZE_MLKEM768_X25519 (1216) +#define SYMCRYPT_COMPOSITE_MLKEM_PUBLIC_KEY_SIZE_MLKEM1024_P384 (1665) + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptCompositeMlKemSizeofKeyFormatFromParams( + SYMCRYPT_COMPOSITE_MLKEM_PARAMS params, + SYMCRYPT_COMPOSITE_MLKEMKEY_FORMAT compositeMlKemkeyformat, + _Out_ SIZE_T* pcbKeyFormat ); +// +// Gives the size in bytes of the blob of the given format for the given Composite ML-KEM +// parameters via pcbKeyFormat output. +// Returns SYMCRYPT_INCOMPATIBLE_FORMAT if compositeMlKemkeyformat is an unsupported value, +// or SYMCRYPT_INVALID_ARGUMENT if other parameters are invalid. +// + +#define SYMCRYPT_COMPOSITE_MLKEM_CIPHERTEXT_SIZE_MLKEM768_P256 (1153) +#define SYMCRYPT_COMPOSITE_MLKEM_CIPHERTEXT_SIZE_MLKEM768_X25519 (1120) +#define SYMCRYPT_COMPOSITE_MLKEM_CIPHERTEXT_SIZE_MLKEM1024_P384 (1665) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptCompositeMlKemSizeofCiphertextFromParams( + SYMCRYPT_COMPOSITE_MLKEM_PARAMS params, + _Out_ SIZE_T* pcbCiphertext ); +// +// Gives the size in bytes of the ciphertext for the given Composite ML-KEM parameters. +// Returns SYMCRYPT_INVALID_ARGUMENT if parameters are invalid. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptCompositeMlKemkeyGenerate( + _Inout_ PSYMCRYPT_COMPOSITE_MLKEMKEY pkCompositeMlKemkey, + UINT32 flags ); +// +// Generate a new random Composite ML-KEM key using the information from the +// parameters passed to SymCryptCompositeMlKemkeyAllocate. +// +// Allowed flags: +// +// - SYMCRYPT_FLAG_KEY_NO_FIPS +// Opt-out of performing validation required for FIPS +// +// Described in more detail in the "Flags for asymmetric key generation and import" section above +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptCompositeMlKemkeySetValue( + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_COMPOSITE_MLKEMKEY_FORMAT compositeMlKemkeyFormat, + UINT32 flags, + _Inout_ PSYMCRYPT_COMPOSITE_MLKEMKEY pkCompositeMlKemkey ); +// +// Import key material to a Composite ML-KEM key object. The arguments are the following: +// - (pbSrc, cbSrc): a buffer containing a representation of a Composite ML-KEM key, +// in format specified by compositeMlKemkeyFormat. +// - compositeMlKemkeyFormat format of the input +// +// Allowed flags: +// +// - SYMCRYPT_FLAG_KEY_NO_FIPS +// Opt-out of performing validation required for FIPS +// +// - SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION +// Opt-out of performing almost all validation - must be specified with SYMCRYPT_FLAG_KEY_NO_FIPS +// +// Remarks: +// - cbSrc must be equal to the cbKeyFormat returned from +// SymCryptCompositeMlKemSizeofKeyFormatFromParams(params, compositeMlKemkeyFormat, &cbKeyFormat), though +// typically this value can be known statically (see definition of SYMCRYPT_COMPOSITE_MLKEMKEY_FORMAT) +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptCompositeMlKemkeyGetValue( + _In_ PCSYMCRYPT_COMPOSITE_MLKEMKEY pkCompositeMlKemkey, + _Out_writes_bytes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst, + SYMCRYPT_COMPOSITE_MLKEMKEY_FORMAT compositeMlKemkeyFormat, + UINT32 flags ); +// +// Export key material from a Composite ML-KEM key object. The arguments are the following: +// - (pbDst, cbDst): a buffer into which a representation of a Composite ML-KEM key is +// written, in the format specified by compositeMlKemkeyFormat. +// - compositeMlKemkeyFormat format of the output +// +// Allowed flags: +// - None. +// +// Remarks: +// - If the key object does not have the information required to export to the format +// specified by compositeMlKemkeyFormat this function will return SYMCRYPT_INCOMPATIBLE_FORMAT. +// - cbDst must be equal to the cbKeyFormat returned from +// SymCryptCompositeMlKemSizeofKeyFormatFromParams(params, compositeMlKemkeyFormat, &cbKeyFormat), though typically this +// value can be known statically (see definition of SYMCRYPT_COMPOSITE_MLKEMKEY_FORMAT) +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptCompositeMlKemEncapsulate( + _In_ PCSYMCRYPT_COMPOSITE_MLKEMKEY pkCompositeMlKemkey, + _Out_writes_bytes_( cbAgreedSecret ) PBYTE pbAgreedSecret, + SIZE_T cbAgreedSecret, + _Out_writes_bytes_( cbCiphertext ) PBYTE pbCiphertext, + SIZE_T cbCiphertext ); +// +// Performs the Encapsulate operation of Composite ML-KEM. +// This uses the public information of a Composite ML-KEM keypair to generate an agreed secret +// and a ciphertext. Only a peer with the private information of a Composite ML-KEM keypair can +// decapsulate the ciphertext to compute the agreed secret. +// +// The arguments are the following: +// - pkCompositeMlKemkey: a key which contains public information required for encapsulation. +// - (pbAgreedSecret, cbAgreedSecret): a buffer into which the generated secret is written. +// Currently cbAgreedSecret must be 32 for all parameterizations of Composite ML-KEM. +// - (pbCiphertext, cbCiphertext): a buffer into which the encapsulated secret is written. +// cbCiphertext must equal cbCiphertext given by SymCryptCompositeMlKemSizeofCiphertextFromParams, +// though typically this value can be known statically (see definition of +// SYMCRYPT_COMPOSITE_MLKEM_CIPHERTEXT_SIZE_*). +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptCompositeMlKemDecapsulate( + _In_ PCSYMCRYPT_COMPOSITE_MLKEMKEY pkCompositeMlKemkey, + _In_reads_bytes_( cbCiphertext ) PCBYTE pbCiphertext, + SIZE_T cbCiphertext, + _Out_writes_bytes_( cbAgreedSecret ) PBYTE pbAgreedSecret, + SIZE_T cbAgreedSecret ); +// +// Performs the Decapsulate operation of Composite ML-KEM. +// This uses the private information of a Composite ML-KEM keypair to generate an agreed +// secret from a ciphertext. +// +// The arguments are the following: +// - pkCompositeMlKemkey: a key which contains private information required for decapsulation. +// - (pbCiphertext, cbCiphertext): a buffer containing an encapsulated secret. +// cbCiphertext must equal cbCiphertext given by SymCryptCompositeMlKemSizeofCiphertextFromParams, +// though typically this value can be known statically (see definition of +// SYMCRYPT_COMPOSITE_MLKEM_CIPHERTEXT_SIZE_*). +// - (pbAgreedSecret, cbAgreedSecret): a buffer into which the generated secret is written. +// Currently cbAgreedSecret must be 32 for all parameterizations of Composite ML-KEM. +// +// Note: Given an invalid, but correctly-sized, ciphertext, the Composite ML-KEM Decapsulation operation +// will "implicitly reject" the ciphertext, by returning success in equal time to a valid +// decapsulation operation, with pseudo-random agreed secret output. This forces higher +// level protocols to fail later when symmetric keys of peers do not match. +// So decapsulate will only ever return an error if there are programming errors (e.g. incorrect size), +// or something fundamentally goes wrong with the environment (e.g. internal memory allocation fails). +// + +//////////////////////////////////////////////////////////// +// Module-Lattice-Based Digital Signature Algorithm (ML-DSA) +//////////////////////////////////////////////////////////// + +// Maximum length of the context string used in signing and verification +#define SYMCRYPT_MLDSA_CONTEXT_MAX_LENGTH (255) + +// ML-DSA key formats +// ================== +// The below formats apply **only to external formats**: When somebody is importing or exporting +// a key. The internal format of the keys is not visible to the caller. +typedef enum _SYMCRYPT_MLDSAKEY_FORMAT { + SYMCRYPT_MLDSAKEY_FORMAT_NULL = 0, + SYMCRYPT_MLDSAKEY_FORMAT_PRIVATE_SEED = 1, + // 32-byte private root seed xi from which all other parameters can be derived. + // On its own it is ambiguous which ML-DSA parameter set this represents; callers wanting to + // store this format must track the parameter set alongside the key. + SYMCRYPT_MLDSAKEY_FORMAT_PRIVATE_KEY = 2, + // Standard byte encoding of an ML-DSA private key, per FIPS 204. + // Size is 2560, 4032, or 4896 bytes for ML-DSA 44, 65, and 87 respectively. + SYMCRYPT_MLDSAKEY_FORMAT_PUBLIC_KEY = 3, + // Standard byte encoding of an ML-DSA public key, per FIPS 204. + // Size is 1312, 1952, or 2592 bytes for ML-DSA 44, 65, and 87 respectively. +} SYMCRYPT_MLDSAKEY_FORMAT; + +typedef enum _SYMCRYPT_MLDSA_PARAMS { + SYMCRYPT_MLDSA_PARAMS_NULL = 0, + SYMCRYPT_MLDSA_PARAMS_MLDSA44 = 1, + SYMCRYPT_MLDSA_PARAMS_MLDSA65 = 2, + SYMCRYPT_MLDSA_PARAMS_MLDSA87 = 3, +} SYMCRYPT_MLDSA_PARAMS; +// Currently supported ML-DSA parameter sets are represented externally only by the enum + +typedef enum _SYMCRYPT_PQDSA_HASH_ID { + SYMCRYPT_PQDSA_HASH_ID_NULL = 0, + SYMCRYPT_PQDSA_HASH_ID_SHA256 = 1, + SYMCRYPT_PQDSA_HASH_ID_SHA384 = 2, + SYMCRYPT_PQDSA_HASH_ID_SHA512 = 3, + SYMCRYPT_PQDSA_HASH_ID_SHA512_256 = 4, + SYMCRYPT_PQDSA_HASH_ID_SHA3_256 = 5, + SYMCRYPT_PQDSA_HASH_ID_SHA3_384 = 6, + SYMCRYPT_PQDSA_HASH_ID_SHA3_512 = 7, + SYMCRYPT_PQDSA_HASH_ID_SHAKE128 = 8, + SYMCRYPT_PQDSA_HASH_ID_SHAKE256 = 9, +} SYMCRYPT_PQDSA_HASH_ID; +// Supported hash algorithms for use with Hash-ML-DSA + +//======================================================================== +// MLDSAKEY objects' API +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaSizeofKeyFormatFromParams( + SYMCRYPT_MLDSA_PARAMS params, + SYMCRYPT_MLDSAKEY_FORMAT mlDsakeyFormat, + _Out_ SIZE_T* pcbKeyFormat ); +// +// Gives the size in bytes of the blob of the given format for the given ML-DSA +// parameters and the specified format via pcbKeyFormat output. +// +// Return values: +// - SYMCRYPT_NO_ERROR on success. +// - SYMCRYPT_INCOMPATIBLE_FORMAT if mlDsakeyFormat is an unsupported value. +// - SYMCRYPT_INVALID_ARGUMENT if other parameters are invalid. +// + +#define SYMCRYPT_MLDSA_SIGNATURE_SIZE_MLDSA44 (2420) +#define SYMCRYPT_MLDSA_SIGNATURE_SIZE_MLDSA65 (3309) +#define SYMCRYPT_MLDSA_SIGNATURE_SIZE_MLDSA87 (4627) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaSizeofSignatureFromParams( + SYMCRYPT_MLDSA_PARAMS params, + _Out_ SIZE_T* pcbSignature ); +// +// Gives the size in bytes of the signature for the given ML-DSA parameters. +// +// Return values: +// - SYMCRYPT_NO_ERROR on success. +// - SYMCRYPT_INVALID_ARGUMENT if parameters are invalid. +// + +_Success_( return != NULL ) +PSYMCRYPT_MLDSAKEY +SYMCRYPT_CALL +SymCryptMlDsakeyAllocate( + SYMCRYPT_MLDSA_PARAMS params ); +// +// Allocate a new ML-DSA key object sized according to the parameters. +// +// This call does not generate key material. It should be followed by a call to +// SymCryptMlDsakeyGenerate or SymCryptMlDsakeySetValue. +// +// May return NULL if memory allocation fails. +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsakeyFree( + _Post_invalid_ PSYMCRYPT_MLDSAKEY pkMlDsakey ); +// +// Free an ML-DSA key object that was allocated with SymCryptMlDsakeyAllocate. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsakeyGenerate( + _Inout_ PSYMCRYPT_MLDSAKEY pkMlDsakey, + UINT32 flags ); +// +// Generate a new random ML-DSA key using the information from the +// parameters passed to SymCryptMlDsakeyAllocate. +// +// Parameters: +// - pkMlDsakey: a pointer to an ML-DSA key object allocated with SymCryptMlDsakeyAllocate +// +// Allowed flags: +// +// - SYMCRYPT_FLAG_KEY_NO_FIPS +// Opt-out of performing validation required for FIPS +// +// Return values: +// - SYMCRYPT_NO_ERROR on success. +// - SYMCRYPT_MEMORY_ALLOCATION_FAILURE if memory allocation fails. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsakeySetValue( + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_MLDSAKEY_FORMAT mlDsakeyFormat, + UINT32 flags, + _Inout_ PSYMCRYPT_MLDSAKEY pkMlDsakey ); +// +// Import key material to an ML-DSA key object from a byte blob. +// +// Parameters: +// - (pbSrc, cbSrc): a buffer containing a representation of an ML-DSA key, in the format specified +// by the format parameter. +// - mlDsakeyFormat: format of the input +// - pkMlDsakey: a pointer to an ML-DSA key object allocated with SymCryptMlDsakeyAllocate. +// +// Allowed flags: +// +// - SYMCRYPT_FLAG_KEY_NO_FIPS +// Opt-out of performing validation required for FIPS +// +// Remarks: +// - cbSrc must be equal to the cbKeyFormat returned from +// SymCryptMlDsaSizeofKeyFormatFromParams(params, format, &cbKeyFormat), though typically this +// value can be known statically (see definition of SYMCRYPT_MLDSAKEY_FORMAT) +// +// Return values: +// - SYMCRYPT_NO_ERROR on success. +// - SYMCRYPT_INCOMPATIBLE_FORMAT if the key format is invalid. +// - SYMCRYPT_INVALID_ARGUMENT if other arguments are invalid. +// - SYMCRYPT_WRONG_KEY_SIZE if cbSrc does not match the expected size for the key format. +// - SYMCRYPT_INVALID_BLOB if the encoded key is invalid. +// - SYMCRYPT_MEMORY_ALLOCATION_FAILURE if memory allocation fails. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsakeyGetValue( + _In_ PCSYMCRYPT_MLDSAKEY pkMlDsakey, + _Out_writes_bytes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst, + SYMCRYPT_MLDSAKEY_FORMAT mlDsakeyFormat, + UINT32 flags ); +// +// Export key material from an ML-DSA key object to a byte blob. +// +// Parameters: +// - pkMlDsakey: pointer to a valid ML-DSA key object. +// - (pbDst, cbDst): buffer for the exported ML-DSA key, in the format specified by the format +// parameter. +// - mlDsakeyFormat: format of the output +// - flags: no flags are currently defined; must be set to 0 +// +// Return values: +// - SYMCRYPT_NO_ERROR on success. +// - SYMCRYPT_INCOMPATIBLE_FORMAT if the key object does not have the information required to export +// the format specified by mlDsakeyFormat. +// - SYMCRYPT_INVALID_ARGUMENT if the output buffer size or other arguments are incorrect. +// +// Remarks: +// - cbDst must be equal to the cbKeyFormat returned from +// SymCryptMlDsaSizeofKeyFormatFromParams(params, format, &cbKeyFormat), though typically this +// value can be known statically (see definition of SYMCRYPT_MLDSAKEY_FORMAT) +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaSign( + _In_ PCSYMCRYPT_MLDSAKEY pkMlDsakey, + _In_reads_bytes_( cbMessage ) PCBYTE pbMessage, + SIZE_T cbMessage, + _In_reads_bytes_opt_( cbContext ) PCBYTE pbContext, + _In_range_( 0, SYMCRYPT_MLDSA_CONTEXT_MAX_LENGTH ) SIZE_T cbContext, + UINT32 flags, + _Out_writes_bytes_( cbSignature ) PBYTE pbSignature, + SIZE_T cbSignature ); +// +// Sign a message using "pure" ML-DSA. The message can be of arbitrary length. +// +// Parameters: +// - pkMlDsakey: an ML-DSA key object. Must contain the private key material. +// - (pbMessage, cbMessage): the message to sign. May be of arbitrary length. +// - (pbContext, cbContext): an optional context string which will be included in the message +// representative to be signed. Length must be <= SYMCRYPT_MLDSA_CONTEXT_MAX_LENGTH. +// - flags: no flags are currently defined; must be set to 0 +// - (pbSignature, cbSignature): the buffer into which the signature is written. +// +// Return values: +// - SYMCRYPT_NO_ERROR on success. +// - SYMCRYPT_INVALID_ARGUMENT if the key object does not contain a private key, or if other +// parameters are invalid. +// - SYMCRYPT_MEMORY_ALLOCATION_FAILURE if memory allocation fails. +// +// Remarks: +// cbSignature must be equal to the cbKeyFormat returned from +// SymCryptMlDsaSizeofSignatureFromParams( params, &cbSignature ), though typically this +// value can be known statically (see definition of SYMCRYPT_MLDSA_SIGNATURE_SIZE_*). +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptExternalMuMlDsaSign( + _In_ PCSYMCRYPT_MLDSAKEY pkMlDsakey, + _In_reads_bytes_( cbMu ) PCBYTE pbMu, + SIZE_T cbMu, + UINT32 flags, + _Out_writes_bytes_( cbSignature ) PBYTE pbSignature, + SIZE_T cbSignature ); +// +// Sign a precomputed message representative Mu. +// +// Parameters: +// - (pbMu, cbMu): the message representative to sign, +// which must be of size 64 (SYMCRYPT_SHAKE256_RESULT_SIZE). +// - All other parameters are the same as for SymCryptMlDsaSign. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHashMlDsaSign( + _In_ PCSYMCRYPT_MLDSAKEY pkMlDsakey, + SYMCRYPT_PQDSA_HASH_ID hashAlg, + _In_reads_bytes_( cbHash ) PCBYTE pbHash, + SIZE_T cbHash, + _In_reads_bytes_opt_( cbContext ) PCBYTE pbContext, + _In_range_( 0, SYMCRYPT_MLDSA_CONTEXT_MAX_LENGTH ) SIZE_T cbContext, + UINT32 flags, + _Out_writes_bytes_( cbSignature ) PBYTE pbSignature, + SIZE_T cbSignature ); +// +// Sign a message using "pre-hash" ML-DSA. The caller precomputes the hash of the message. +// +// Parameters: +// - hashAlg: the ID of the hash algorithm used to compute pbHash. +// - (pbHash, cbHash): the hash of the message to sign. +// - All other parameters are the same as for SymCryptMlDsaSign. +// +// Return values: +// - SYMCRYPT_NO_ERROR on success. +// - SYMCRYPT_INVALID_ARGUMENT if the key object does not contain a private key, or if other +// parameters are invalid. +// - SYMCRYPT_MEMORY_ALLOCATION_FAILURE if memory allocation fails. +// +// Remarks: +// The hash algorithm provided must meet the minimum required collision strength defined for the +// chosen ML-DSA parameter set. This is the lambda parameter in FIPS 204. This means that the +// following hash algorithms are supported: +// +// ML-DSA-44 (lambda = 128): SHA-256, SHA-384, SHA-512, SHA-512/256, SHA3-256, SHA3-384, SHA3-512, SHAKE128, SHAKE256 +// ML-DSA-65 (lambda = 192): SHA-384, SHA-512, SHA3-384, SHA3-512, SHAKE256 +// ML-DSA-87 (lambda = 256): SHA-512, SHA3-512, SHAKE256 +// +// Additionally, cbHash must match the output length of the hash algorithm. +// For XOFs, the any output length >= the minimum collision strength is acceptable. If this +// requirement is not met, the function returns SYMCRYPT_INVALID_ARGUMENT. +// +// As with SymCryptMlDsaSign, cbSignature must be equal to the cbKeyFormat returned from +// SymCryptMlDsaSizeofSignatureFromParams( params, &cbSignature ), though typically this +// value can be known statically (see definition of SYMCRYPT_MLDSA_SIGNATURE_SIZE_*). +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaVerify( + _In_ PCSYMCRYPT_MLDSAKEY pkMlDsakey, + _In_reads_bytes_( cbMessage ) PCBYTE pbMessage, + SIZE_T cbMessage, + _In_reads_bytes_opt_( cbContext ) PCBYTE pbContext, + _In_range_( 0, SYMCRYPT_MLDSA_CONTEXT_MAX_LENGTH ) SIZE_T cbContext, + _In_reads_bytes_( cbSignature ) PCBYTE pbSignature, + SIZE_T cbSignature, + UINT32 flags ); +// +// Verify a signature using "pure" ML-DSA. The message can be of arbitrary length. +// +// Parameters: +// - pkMlDsakey: the ML-DSA key object used to verify the signature. +// - (pbMessage, cbMessage): the message that the signature was generated from. +// - (pbContext, cbContext): an optional context string which will be included in the message +// representative to be signed. Length must be <= SYMCRYPT_MLDSA_CONTEXT_MAX_LENGTH. +// - (pbSignature, cbSignature): the signature to verify. +// - flags: no flags are currently defined; must be set to 0 +// +// Return values: +// - SYMCRYPT_NO_ERROR if the signature was verified successfully. +// - SYMCRYPT_SIGNATURE_VERIFICATION_FAILURE if the signature is invalid. +// - SYMCRYPT_INVALID_ARGUMENT if the parameters are invalid. + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptExternalMuMlDsaVerify( + _In_ PCSYMCRYPT_MLDSAKEY pkMlDsakey, + _In_reads_bytes_( cbMu ) PCBYTE pbMu, + SIZE_T cbMu, + _In_reads_bytes_( cbSignature ) PCBYTE pbSignature, + SIZE_T cbSignature, + UINT32 flags ); +// +// Verify a signature of a precomputed message representative Mu. +// +// Parameters: +// - (pbMu, cbMu): the message representative that was signed, +// which must be of size 64 (SYMCRYPT_SHAKE256_RESULT_SIZE). +// - All other parameters are the same as for SymCryptMlDsaVerify. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHashMlDsaVerify( + _In_ PCSYMCRYPT_MLDSAKEY pkMlDsakey, + SYMCRYPT_PQDSA_HASH_ID hashAlg, + _In_reads_bytes_( cbHash ) PCBYTE pbHash, + SIZE_T cbHash, + _In_reads_bytes_opt_( cbContext ) PCBYTE pbContext, + _In_range_( 0, SYMCRYPT_MLDSA_CONTEXT_MAX_LENGTH ) SIZE_T cbContext, + _In_reads_bytes_( cbSignature ) PCBYTE pbSignature, + SIZE_T cbSignature, + UINT32 flags ); +// +// Verify a signature using "pre-hash" ML-DSA. The caller precomputes the hash of the message. +// +// Parameters: +// - hashAlg: the ID of the hash algorithm used to compute pbHash. +// - (pbHash, cbHash): the hash of the message that the signature was generated from. +// - All other parameters are the same as for SymCryptMlDsaVerify. +// +// Return values: +// - SYMCRYPT_NO_ERROR if the signature was validated successfully. +// - SYMCRYPT_SIGNATURE_VERIFICATION_FAILURE if the signature is invalid. +// - SYMCRYPT_INVALID_ARGUMENT if the parameters are invalid. +// +// Remarks: +// See the remarks for SymCryptHashMlDsaSign regarding the required security strength of the hash +// algorithm. For unsupported hash algorithms, the function will return SYMCRYPT_INVALID_ARGUMENT. + +VOID +SYMCRYPT_CALL +SymCryptMlDsaSelftest( void ); +// +// FIPS selftest for ML-DSA +// + +_Analysis_noreturn_ +VOID +SYMCRYPT_CALL +SymCryptFatal( UINT32 fatalCode ); +// +// Call the Fatal routine passed to the library upon initialization +// We use the SYMCRYPT_ASSERT macro to catch problems in Debug builds +// + + +typedef struct _SYMCRYPT_UINT32_MAP { + UINT32 from; // map this value... + UINT32 to; // ...into this value +} SYMCRYPT_UINT32_MAP, *PSYMCRYPT_UINT32_MAP; +typedef const SYMCRYPT_UINT32_MAP * PCSYMCRYPT_UINT32_MAP; + + +UINT32 +SYMCRYPT_CALL +SymCryptMapUint32( + UINT32 u32Input, + UINT32 u32Default, + _In_reads_( nMap ) PCSYMCRYPT_UINT32_MAP pcMap, + SIZE_T nMap ); +// +// Map values in a side-channel safe way, typically used for mapping error codes. +// +// (pcMap, nMap) point to an array of nMap entries of type SYMCRYPT_UINT32_MAP; +// each entry specifies a single mapping. If u32Input matches the +// 'from' field, the return value will be the 'to' field value. +// If u32Input is not equal to any 'from' field values, the return value is u32Default. +// Both u32Input and the return value are treated as secrets w.r.t. side channels. +// +// If multiple map entries have the same 'from' field value, then the return value +// is one of the several 'to' field values; which one is not defined. +// +// This function is particularly useful when mapping error codes in situations where +// the actual error cannot be revealed through side channels. + +#if SYMCRYPT_DEBUG +#define SYMCRYPT_ASSERT( _x ) \ + {\ + if( !(_x) ){ SymCryptFatal( 'asrt' ); }\ + }\ + _Analysis_assume_( _x ) +#else +#define SYMCRYPT_ASSERT( _x ) \ + _Analysis_assume_( _x ) +#endif + + +#ifdef __cplusplus +} +#endif diff --git a/libs/symcrypt/inc/symcrypt_internal.h b/libs/symcrypt/inc/symcrypt_internal.h new file mode 100644 index 00000000000..0fe5fe313d3 --- /dev/null +++ b/libs/symcrypt/inc/symcrypt_internal.h @@ -0,0 +1,3768 @@ +// +// SymCrypt_internal.h +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +// +// This file contains information that is internal to the symcrypt library, +// but which still needs to be known to the compiler to be able to use the library. +// This includes structure declarations and all support for inline implementations +// of some of the library functions. +// Information in this file is not part of the API and can change at any time. +// + +#pragma GCC diagnostic ignored "-Wunknown-pragmas" + +// +// We use Prefast pragmas, but they are not recognized by the compiler. +// We disable the 'unknown pragma' warning if we are not in prefast mode. +// +#ifndef _PREFAST_ +#pragma warning(disable:4068) +#endif + +//============================================================================================== +// PLATFORM/COMPILER DETECTION +//============================================================================================== + +#define SYMCRYPT_PLATFORM_WINDOWS 0 +#define SYMCRYPT_PLATFORM_APPLE 0 // macOS and other Apple platforms +#define SYMCRYPT_PLATFORM_UNIX 0 // Linux and other Unix-likes, besides macOS. Must support POSIX. + +#if defined(_WIN32) + #undef SYMCRYPT_PLATFORM_WINDOWS + #define SYMCRYPT_PLATFORM_WINDOWS 1 +#elif defined(__APPLE__) + #undef SYMCRYPT_PLATFORM_APPLE + #define SYMCRYPT_PLATFORM_APPLE 1 +#elif (defined(linux) || defined(__unix__)) + #undef SYMCRYPT_PLATFORM_UNIX + #define SYMCRYPT_PLATFORM_UNIX 1 +#endif + +#define SYMCRYPT_MS_VC 0 // Microsoft compiler (cl.exe - Visual Studio/MSBuild) +#define SYMCRYPT_GNUC 0 // GCC and compatible compilers (including Clang) + +#if defined(_MSC_VER) + #undef SYMCRYPT_MS_VC + #define SYMCRYPT_MS_VC 1 +#elif defined(__GNUC__) + #undef SYMCRYPT_GNUC + #define SYMCRYPT_GNUC 1 +#else + #error Unsupported compiler +#endif + +#if SYMCRYPT_MS_VC + +// This should go somewhere else. Same in the other #if branches. +#define SYMCRYPT_ANYSIZE_ARRAY 1 +#define SYMCRYPT_NOINLINE __declspec(noinline) +#define SYMCRYPT_CDECL __cdecl +#define SYMCRYPT_FASTCALL __fastcall + +#define SYMCRYPT_UNALIGNED + +#elif SYMCRYPT_GNUC + +// Ignore the multi-character character constant warnings +#pragma GCC diagnostic ignored "-Wmultichar" +#pragma GCC diagnostic ignored "-Wincompatible-pointer-types" + +#define SYMCRYPT_ANYSIZE_ARRAY 1 +#define SYMCRYPT_NOINLINE __attribute__ ((noinline)) +#define SYMCRYPT_UNALIGNED +#define SYMCRYPT_CDECL +#define SYMCRYPT_FASTCALL __attribute__((fastcall)) + +#endif + +#ifdef __clang__ +#pragma clang diagnostic ignored "-Wmultichar" +#pragma clang diagnostic ignored "-Wincompatible-function-pointer-types" +#pragma clang diagnostic ignored "-Wincompatible-pointer-types-discards-qualifiers" +#endif + +//============================================================================================== +// PLATFORM SPECIFICS +//============================================================================================== + +// +// SYMCRYPT_CALL & SYMCRYPT_ALIGN +// +// SYMCRYPT_CALL is a macro that selects the calling convention used by the library. +// Crypto functions often have to perform very many small operations, and a fast calling convention is +// preferable. We use __fastcall on platforms that support it. +// +// SYMCRYPT_ALIGN is the default alignment for the platform. +// On platforms that have alignment restrictions the default alignment should be large enough that +// an aligned BYTE * can be cast to a pointer to a UINT32 and be used. +// +// +// The SYMCRYPT_IGNORE_PLATFORM macro can be defined to switch off any platform-specific +// optimizations and run just the C implementations. +// The rest of the library uses SYMCRYPT_CPU_* macros to make platform decisions. +// +// +// WARNING: both the library and the calling application must be compiled with the same +// set of flags, as the flags affect things like the structure layout and size and +// the calling convention, both of which need to be in sync between the lib and the caller. +// + +//#define SYMCRYPT_IGNORE_PLATFORM // #defining this flag disables all platform optimizations. + +#define SYMCRYPT_CPU_X86 0 +#define SYMCRYPT_CPU_AMD64 0 +#define SYMCRYPT_CPU_ARM 0 +#define SYMCRYPT_CPU_ARM64 0 +#define SYMCRYPT_CPU_UNKNOWN 0 + +#if (defined( _X86_ ) || defined( _M_IX86 ) || defined( __i386__ )) && !defined ( SYMCRYPT_IGNORE_PLATFORM ) + +#undef SYMCRYPT_CPU_X86 +#define SYMCRYPT_CPU_X86 1 + +#define SYMCRYPT_CALL SYMCRYPT_FASTCALL +#define SYMCRYPT_ALIGN_VALUE 4 + +#ifndef _PREFAST_ +#pragma warning(push) +#pragma warning(disable:4359) // *** Alignment specifier is less than actual alignment +#endif + +#elif (defined( _ARM64_ ) || defined( _ARM64EC_ ) || defined( _M_ARM64 ) || defined( __aarch64__ ) || defined(__arm64ec__)) && !defined( SYMCRYPT_IGNORE_PLATFORM ) + +#undef SYMCRYPT_CPU_ARM64 +#define SYMCRYPT_CPU_ARM64 1 +#define SYMCRYPT_CALL +#define SYMCRYPT_ALIGN_VALUE 16 + +#elif (defined( _AMD64_ ) || defined( _M_AMD64 ) || defined( __amd64__ )) && !defined ( SYMCRYPT_IGNORE_PLATFORM ) + +#undef SYMCRYPT_CPU_AMD64 +#define SYMCRYPT_CPU_AMD64 1 + +#define SYMCRYPT_CALL +#define SYMCRYPT_ALIGN_VALUE 16 + +#elif (defined( _ARM_ ) || defined( _M_ARM ) || defined( __arm__ )) && !defined( SYMCRYPT_IGNORE_PLATFORM ) + +#undef SYMCRYPT_CPU_ARM +#define SYMCRYPT_CPU_ARM 1 +#define SYMCRYPT_CALL +#define SYMCRYPT_ALIGN_VALUE 8 + +#elif defined( SYMCRYPT_IGNORE_PLATFORM ) + +#undef SYMCRYPT_CPU_UNKNOWN +#define SYMCRYPT_CPU_UNKNOWN 1 +#define SYMCRYPT_CALL +#define SYMCRYPT_ALIGN_VALUE 16 + +#ifndef _PREFAST_ +#pragma warning(push) +#pragma warning(disable:4359) // *** Alignment specifier is less than actual alignment +#endif + +#else + +#error Unknown CPU platform + +#endif // SYMCRYPT_CALL platforms switch + + +// +// Datatypes used by the SymCrypt library. This ensures compatibility +// with multiple environments, such as Windows, iOS, and Android. +// + +#if SYMCRYPT_PLATFORM_WINDOWS + + // + // Types included in intsafe.h: + // BYTE, + // INT16, UINT16, + // INT32, UINT32, + // INT64, UINT64, + // UINT_PTR + // and macro: + // UINT32_MAX + // +#include <intsafe.h> + +#else + +#include <stdint.h> + +typedef uint8_t BYTE; + +#ifndef UINT32_MAX +#define UINT32_MAX (0xffffffff) +#endif + +#ifndef TRUE +#define TRUE 0x01 +#endif + +#ifndef FALSE +#define FALSE 0x00 +#endif + +// Size_t +typedef size_t SIZE_T; + +#ifndef SIZE_T_MAX +#define SIZE_T_MAX SIZE_MAX +#endif + +typedef int BOOL; + +typedef int8_t INT8, *PINT8; +typedef int16_t INT16, *PINT16; +typedef int32_t INT32, *PINT32; +typedef int64_t INT64, *PINT64; +typedef uint8_t UINT8, *PUINT8; +typedef uint16_t UINT16, *PUINT16; +typedef uint32_t UINT32, *PUINT32; +typedef uint64_t UINT64, *PUINT64; + +// minwindef.h +typedef char CHAR; + +#endif //WIN32 + +#include <stddef.h> + +// +// Pointer types +// +typedef BYTE * PBYTE; +typedef const BYTE * PCBYTE; + +typedef UINT16 * PUINT16; +typedef const UINT16 * PCUINT16; + +typedef UINT32 * PUINT32; +typedef const UINT32 * PCUINT32; + +typedef UINT64 * PUINT64; +typedef const UINT64 * PCUINT64; + +// Void + +#ifndef VOID +#define VOID void +#endif + +typedef void * PVOID; +typedef const void * PCVOID; + +// winnt.h +typedef BYTE BOOLEAN; + +// Useful macros for structs +#define SYMCRYPT_FIELD_OFFSET(type, field) (offsetof(type, field)) +#define SYMCRYPT_FIELD_SIZE(type, field) (sizeof( ((type *)0)->field )) + +#if SYMCRYPT_MS_VC + +#ifndef FORCEINLINE +#if (_MSC_VER >= 1200) +#define FORCEINLINE __forceinline +#else +#define FORCEINLINE __inline +#endif +#endif + +#else + +#define FORCEINLINE inline __attribute__((always_inline)) + +#endif + +C_ASSERT( (SYMCRYPT_ALIGN_VALUE & (SYMCRYPT_ALIGN_VALUE - 1 )) == 0 ); +#define SYMCRYPT_ALIGN_UP( _p ) ((PBYTE) ( ((SIZE_T) (_p) + SYMCRYPT_ALIGN_VALUE - 1) & ~(SYMCRYPT_ALIGN_VALUE - 1 ) ) ) + +#if SYMCRYPT_MS_VC + #define SYMCRYPT_ALIGN_AT(alignment) __declspec(align(alignment)) + #define SYMCRYPT_WEAK_SYMBOL +#elif SYMCRYPT_GNUC + #define SYMCRYPT_ALIGN_AT(alignment) __attribute__((aligned(alignment))) + #define SYMCRYPT_WEAK_SYMBOL __attribute__((weak)) +#else + #define SYMCRYPT_ALIGN_AT(alignment) + #define SYMCRYPT_WEAK_SYMBOL +#endif +#define SYMCRYPT_ALIGN_TYPE_AT(typename, alignment) typename SYMCRYPT_ALIGN_AT(alignment) +#define SYMCRYPT_ALIGN SYMCRYPT_ALIGN_AT(SYMCRYPT_ALIGN_VALUE) +#define SYMCRYPT_ALIGN_STRUCT SYMCRYPT_ALIGN_TYPE_AT(struct, SYMCRYPT_ALIGN_VALUE) +#define SYMCRYPT_ALIGN_UNION SYMCRYPT_ALIGN_TYPE_AT(union, SYMCRYPT_ALIGN_VALUE) + + +#define SYMCRYPT_MAX( _a, _b ) ((_a)>(_b)?(_a):(_b)) +#define SYMCRYPT_MIN( _a, _b ) ((_a)<(_b)?(_a):(_b)) + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 +// +// XMM related declarations, used in data structures. +// +#pragma prefast(push) +#pragma prefast(disable: 28251, "Windows headers define _mm_clflush with SAL annotation, Intel header doesn't have SAL annotation leading to inconsistent annotation errors") +#include <emmintrin.h> +#pragma prefast(pop) +#endif + + +// +// To provide quick error detection we have magic values in all +// our data structures, but only in CHKed builds. +// Our magic value depends on the address of the structure. +// This has the advantage that we detect blind memcpy's of our data structures. +// Memcpy is not supported as it limits what the library is allowed to do. +// Where needed the library provides for copy functions of its internal data structures. +// +#if SYMCRYPT_DEBUG + #define SYMCRYPT_MAGIC_ENABLED +#endif + +#if defined(SYMCRYPT_MAGIC_ENABLED ) + +#define SYMCRYPT_MAGIC_FIELD SIZE_T magic; +#define SYMCRYPT_MAGIC_VALUE( p ) ((SIZE_T) p + 'S1mv' + SYMCRYPT_API_VERSION) + + +#define SYMCRYPT_SET_MAGIC( p ) {(p)->magic = SYMCRYPT_MAGIC_VALUE( p );} +#define SYMCRYPT_CHECK_MAGIC( p ) {if((p)->magic!=SYMCRYPT_MAGIC_VALUE(p)) SymCryptFatal('magc');} +#define SYMCRYPT_WIPE_MAGIC( p ) {(p)->magic = 0;} + +#else + +// +// We define the magic field even for FRE builds, because we get too many +// hard-to-debug problems with people who accidentally mix FRE headers with CHKed libraries, +// or the other way around. +// E.g. BitLocker only publishes the FRE version of their library, and building a CHKed binary with +// that FRE lib crashes +// + +#define SYMCRYPT_MAGIC_FIELD SIZE_T magic; +#define SYMCRYPT_SET_MAGIC( p ) +#define SYMCRYPT_CHECK_MAGIC( p ) +#define SYMCRYPT_WIPE_MAGIC( p ) + +#endif + +// +// CPU feature detection infrastructure +// + +#if !SYMCRYPT_PLATFORM_WINDOWS + // Forward declarations for CPUID intrinsic replacements + void __cpuidex(int CPUInfo[4], int InfoType, int ECXValue); +#endif + +#if SYMCRYPT_CPU_ARM || SYMCRYPT_CPU_ARM64 + +#define SYMCRYPT_CPU_FEATURE_NEON 0x01 +#define SYMCRYPT_CPU_FEATURE_NEON_AES 0x02 +#define SYMCRYPT_CPU_FEATURE_NEON_PMULL 0x04 +#define SYMCRYPT_CPU_FEATURE_NEON_SHA256 0x08 + +#elif SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + +// +// We keep the most commonly tested bits in the least significant byte, to make it easier for the compiler to optimize +// There is a many to one relationship between CPUID feature flags and SYMCRYPT_CPU_FEATURE_XXX bits +// since a SYMCRYPT_CPU_FEATURE_XXX could require multiple CPUID features. + +#define SYMCRYPT_CPU_FEATURE_SSE2 0x0001 // includes SSE, SSE2 +#define SYMCRYPT_CPU_FEATURE_SSSE3 0x0002 // includes SSE, SSE2, SSE3, SSSE3 +#define SYMCRYPT_CPU_FEATURE_AESNI 0x0004 +#define SYMCRYPT_CPU_FEATURE_PCLMULQDQ 0x0008 +#define SYMCRYPT_CPU_FEATURE_AVX2 0x0010 // includes AVX, AVX2 - also indicates support for saving/restoring Ymm registers +#define SYMCRYPT_CPU_FEATURE_SAVEXMM_NOFAIL 0x0020 // if SymCryptSaveXmm() will never fail +#define SYMCRYPT_CPU_FEATURE_SHANI 0x0040 +#define SYMCRYPT_CPU_FEATURE_BMI2 0x0080 // MULX, RORX, SARX, SHLX, SHRX + +#define SYMCRYPT_CPU_FEATURE_ADX 0x0100 // ADCX, ADOX +#define SYMCRYPT_CPU_FEATURE_RDRAND 0x0200 +#define SYMCRYPT_CPU_FEATURE_RDSEED 0x0400 +#define SYMCRYPT_CPU_FEATURE_VAES 0x0800 // support for VAES and VPCLMULQDQ (may only be supported on Ymm registers (i.e. Zen3)) +#define SYMCRYPT_CPU_FEATURE_AVX512 0x1000 // includes F, VL, DQ, BW (VL allows AVX-512 instructions to be used on Xmm and Ymm registers) + // also indicates support for saving/restoring additional AVX-512 state + +#define SYMCRYPT_CPU_FEATURE_CMPXCHG16B 0x2000 // Compare and Swap 128b value + +#endif + +typedef UINT32 SYMCRYPT_CPU_FEATURES; + +// +// We have two feature fields. +// g_SymCryptCpuFeaturesNotPresent reports with features are not present on the current CPU +// SymCryptCpuFeaturesNeverPresent() is a function that returns a static (compiler-predictable) value, +// and allows the environment to lock out features in a way that the compiler can optimize away all the code that uses these features. +// Using a function allows the environment macro to forward it to an environment-specific function. +// + +extern SYMCRYPT_CPU_FEATURES g_SymCryptCpuFeaturesNotPresent; + +SYMCRYPT_CPU_FEATURES +SYMCRYPT_CALL +SymCryptCpuFeaturesNeverPresent(void); + +#define SYMCRYPT_CPU_FEATURES_PRESENT( x ) ( ((x) & SymCryptCpuFeaturesNeverPresent()) == 0 && ( (x) & g_SymCryptCpuFeaturesNotPresent ) == 0 ) + +// +// VOLATILE MEMORY ACCESS +// +// These macros are used to explicitly handle volatile memory access independent of compiler settings. +// If volatile memory is accessed directly without using the appropriate macro, MSVC may emit warning +// C4746, because the volatile semantics depend on the value of the /volatile flag, which can result in +// undesired hardware memory barriers that impact performance. +// +// More info: +// https://docs.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compil... +// https://docs.microsoft.com/en-us/cpp/build/reference/volatile-volatile-keywo... +// + +#if SYMCRYPT_MS_VC // Microsoft VC++ Compiler + + #if SYMCRYPT_CPU_ARM || SYMCRYPT_CPU_ARM64 + #define SYMCRYPT_INTERNAL_VOLATILE_READ8( _p ) ( __iso_volatile_load8( (const volatile char*)(_p) ) ) + #define SYMCRYPT_INTERNAL_VOLATILE_READ16( _p ) ( __iso_volatile_load16( (const volatile short*)(_p) ) ) + #define SYMCRYPT_INTERNAL_VOLATILE_READ32( _p ) ( __iso_volatile_load32( (const volatile int*)(_p) ) ) + #define SYMCRYPT_INTERNAL_VOLATILE_READ64( _p ) ( __iso_volatile_load64( (const volatile __int64*)(_p) ) ) + + #define SYMCRYPT_INTERNAL_VOLATILE_WRITE8( _p, _v ) ( __iso_volatile_store8( (volatile char*)(_p), (_v) ) ) + #define SYMCRYPT_INTERNAL_VOLATILE_WRITE16( _p, _v ) ( __iso_volatile_store16( (volatile short*)(_p), (_v) ) ) + #define SYMCRYPT_INTERNAL_VOLATILE_WRITE32( _p, _v ) ( __iso_volatile_store32( (volatile int*)(_p), (_v) ) ) + #define SYMCRYPT_INTERNAL_VOLATILE_WRITE64( _p, _v ) ( __iso_volatile_store64( (volatile __int64*)(_p), (_v) ) ) + #elif SYMCRYPT_CPU_X86 || SYMCRYPT_CPU_AMD64 + #define SYMCRYPT_INTERNAL_VOLATILE_READ8( _p ) ( *((const volatile BYTE*) (_p)) ) + #define SYMCRYPT_INTERNAL_VOLATILE_READ16( _p ) ( *((const volatile UINT16*)(_p)) ) + #define SYMCRYPT_INTERNAL_VOLATILE_READ32( _p ) ( *((const volatile UINT32*)(_p)) ) + #define SYMCRYPT_INTERNAL_VOLATILE_READ64( _p ) ( *((const volatile UINT64*)(_p)) ) + + #define SYMCRYPT_INTERNAL_VOLATILE_WRITE8( _p, _v ) ( *((volatile BYTE*) (_p)) = (_v) ) + #define SYMCRYPT_INTERNAL_VOLATILE_WRITE16( _p, _v ) ( *((volatile UINT16*)(_p)) = (_v) ) + #define SYMCRYPT_INTERNAL_VOLATILE_WRITE32( _p, _v ) ( *((volatile UINT32*)(_p)) = (_v) ) + #define SYMCRYPT_INTERNAL_VOLATILE_WRITE64( _p, _v ) ( *((volatile UINT64*)(_p)) = (_v) ) + #else // Temporary workaround for CMake compilation issues on Windows. Assume X86/ADM64. + #define SYMCRYPT_INTERNAL_VOLATILE_READ8( _p ) ( *((const volatile BYTE*) (_p)) ) + #define SYMCRYPT_INTERNAL_VOLATILE_READ16( _p ) ( *((const volatile UINT16*)(_p)) ) + #define SYMCRYPT_INTERNAL_VOLATILE_READ32( _p ) ( *((const volatile UINT32*)(_p)) ) + #define SYMCRYPT_INTERNAL_VOLATILE_READ64( _p ) ( *((const volatile UINT64*)(_p)) ) + + #define SYMCRYPT_INTERNAL_VOLATILE_WRITE8( _p, _v ) ( *((volatile BYTE*) (_p)) = (_v) ) + #define SYMCRYPT_INTERNAL_VOLATILE_WRITE16( _p, _v ) ( *((volatile UINT16*)(_p)) = (_v) ) + #define SYMCRYPT_INTERNAL_VOLATILE_WRITE32( _p, _v ) ( *((volatile UINT32*)(_p)) = (_v) ) + #define SYMCRYPT_INTERNAL_VOLATILE_WRITE64( _p, _v ) ( *((volatile UINT64*)(_p)) = (_v) ) + #endif + +#elif SYMCRYPT_GNUC + + #if !SYMCRYPT_CPU_ARM + #define SYMCRYPT_INTERNAL_VOLATILE_READ8( _p ) ( *((const volatile BYTE*) (_p)) ) + #define SYMCRYPT_INTERNAL_VOLATILE_READ16( _p ) ( *((const volatile UINT16*)(_p)) ) + #define SYMCRYPT_INTERNAL_VOLATILE_READ32( _p ) ( *((const volatile UINT32*)(_p)) ) + #define SYMCRYPT_INTERNAL_VOLATILE_READ64( _p ) ( *((const volatile UINT64*)(_p)) ) + + #define SYMCRYPT_INTERNAL_VOLATILE_WRITE8( _p, _v ) ( *((volatile BYTE*) (_p)) = (_v) ) + #define SYMCRYPT_INTERNAL_VOLATILE_WRITE16( _p, _v ) ( *((volatile UINT16*)(_p)) = (_v) ) + #define SYMCRYPT_INTERNAL_VOLATILE_WRITE32( _p, _v ) ( *((volatile UINT32*)(_p)) = (_v) ) + #define SYMCRYPT_INTERNAL_VOLATILE_WRITE64( _p, _v ) ( *((volatile UINT64*)(_p)) = (_v) ) + #else // SYMCRYPT_CPU_ARM + #define SYMCRYPT_INTERNAL_VOLATILE_READ8( _p ) ( *((const volatile BYTE*) (_p)) ) + #define SYMCRYPT_INTERNAL_VOLATILE_READ16( _p ) ( *((const volatile UINT16*)(_p)) ) + #define SYMCRYPT_INTERNAL_VOLATILE_READ32( _p ) ( *((const volatile UINT32*)(_p)) ) + #define SYMCRYPT_INTERNAL_VOLATILE_READ64( p ) ( (UINT64)SYMCRYPT_INTERNAL_VOLATILE_READ32(&((PBYTE)p)[4]) << 32 | SYMCRYPT_INTERNAL_VOLATILE_READ32(&((PBYTE)p)[0]) ) + + #define SYMCRYPT_INTERNAL_VOLATILE_WRITE8( _p, _v ) ( *((volatile BYTE*) (_p)) = (_v) ) + #define SYMCRYPT_INTERNAL_VOLATILE_WRITE16( _p, _v ) ( *((volatile UINT16*)(_p)) = (_v) ) + #define SYMCRYPT_INTERNAL_VOLATILE_WRITE32( _p, _v ) ( *((volatile UINT32*)(_p)) = (_v) ) + #define SYMCRYPT_INTERNAL_VOLATILE_WRITE64( p, x ) { \ + SYMCRYPT_INTERNAL_VOLATILE_WRITE32( &((PBYTE)p)[0], (UINT32)((x) ) );\ + SYMCRYPT_INTERNAL_VOLATILE_WRITE32( &((PBYTE)p)[4], (UINT32)(((UINT64)(x))>>32) );\ + } + #endif + +#else + + #error Unknown compiler + +#endif + +// +// FORCED MEMORY ACCESS +// +// These macros force a memory access. That is, they require that the memory +// read or write takes place, and do not allow the compiler to optimize the access +// away. +// They provide no other memory ordering requirements, so there are no acquire/release +// semantics, memory barriers, etc. +// +// The generic versions are implemented with a volatile access, but that is inefficient on some platforms +// because it might introduce memory ordering requirements. +// + +#define SYMCRYPT_INTERNAL_FORCE_READ8( _p ) SYMCRYPT_INTERNAL_VOLATILE_READ8( _p ) +#define SYMCRYPT_INTERNAL_FORCE_READ16( _p ) SYMCRYPT_INTERNAL_VOLATILE_READ16( _p ) +#define SYMCRYPT_INTERNAL_FORCE_READ32( _p ) SYMCRYPT_INTERNAL_VOLATILE_READ32( _p ) +#define SYMCRYPT_INTERNAL_FORCE_READ64( _p ) SYMCRYPT_INTERNAL_VOLATILE_READ64( _p ) + +#define SYMCRYPT_INTERNAL_FORCE_WRITE8( _p, _v ) SYMCRYPT_INTERNAL_VOLATILE_WRITE8( _p, _v ) +#define SYMCRYPT_INTERNAL_FORCE_WRITE16( _p, _v ) SYMCRYPT_INTERNAL_VOLATILE_WRITE16( _p, _v ) +#define SYMCRYPT_INTERNAL_FORCE_WRITE32( _p, _v ) SYMCRYPT_INTERNAL_VOLATILE_WRITE32( _p, _v ) +#define SYMCRYPT_INTERNAL_FORCE_WRITE64( _p, _v ) SYMCRYPT_INTERNAL_VOLATILE_WRITE64( _p, _v ) + +// +// FIXED ENDIANNESS ACCESS +// +// Fixed endianness load and store +// We do this by platform because it affected by both endianness and alignment requirements +// The p pointer is always a pointer to BYTE +// +#if SYMCRYPT_MS_VC // Microsoft VC++ Compiler + #define SYMCRYPT_BSWAP16( x ) _byteswap_ushort(x) + #define SYMCRYPT_BSWAP32( x ) _byteswap_ulong(x) + #define SYMCRYPT_BSWAP64( x ) _byteswap_uint64(x) +#elif SYMCRYPT_GNUC + #define SYMCRYPT_BSWAP16( x ) __builtin_bswap16(x) + #define SYMCRYPT_BSWAP32( x ) __builtin_bswap32(x) + #define SYMCRYPT_BSWAP64( x ) __builtin_bswap64(x) +#endif + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM64 + + +// +// X86, AMD64, ARM, and ARM64 have no alignment restrictions, and are little-endian. +// We do straight store/loads with BSWAPs where required. +// This technically relies upon on undefined behavior, as we assume the compiler will translate +// operations on unaligned pointers to 2, 4, and 8 bytes types to appropriately unaligned store/load +// instructions on these platforms (not just in these macros). This works for all compilers we +// currently use. +// +#define SYMCRYPT_INTERNAL_LOAD_MSBFIRST16( p ) SYMCRYPT_BSWAP16( *((UINT16 *)(p)) ) +#define SYMCRYPT_INTERNAL_LOAD_LSBFIRST16( p ) ( *((UINT16 *)(p)) ) +#define SYMCRYPT_INTERNAL_LOAD_MSBFIRST32( p ) SYMCRYPT_BSWAP32( *((UINT32 *)(p)) ) +#define SYMCRYPT_INTERNAL_LOAD_LSBFIRST32( p ) ( *((UINT32 *)(p)) ) +#define SYMCRYPT_INTERNAL_LOAD_MSBFIRST64( p ) SYMCRYPT_BSWAP64( *((UINT64 *)(p)) ) +#define SYMCRYPT_INTERNAL_LOAD_LSBFIRST64( p ) ( *((UINT64 *)(p)) ) + +#define SYMCRYPT_INTERNAL_STORE_MSBFIRST16( p, x ) ( *(UINT16 *)(p) = SYMCRYPT_BSWAP16(x) ) +#define SYMCRYPT_INTERNAL_STORE_LSBFIRST16( p, x ) ( *(UINT16 *)(p) = (x) ) +#define SYMCRYPT_INTERNAL_STORE_MSBFIRST32( p, x ) ( *(UINT32 *)(p) = SYMCRYPT_BSWAP32(x) ) +#define SYMCRYPT_INTERNAL_STORE_LSBFIRST32( p, x ) ( *(UINT32 *)(p) = (x) ) +#define SYMCRYPT_INTERNAL_STORE_MSBFIRST64( p, x ) ( *(UINT64 *)(p) = SYMCRYPT_BSWAP64(x) ) +#define SYMCRYPT_INTERNAL_STORE_LSBFIRST64( p, x ) ( *(UINT64 *)(p) = (x) ) + +#elif SYMCRYPT_CPU_ARM + +// +// Only 64 bit accesses need to be aligned. +// +#define SYMCRYPT_INTERNAL_LOAD_MSBFIRST16( p ) SYMCRYPT_BSWAP16( *((UINT16 *)(p)) ) +#define SYMCRYPT_INTERNAL_LOAD_LSBFIRST16( p ) ( *((UINT16 *)(p)) ) +#define SYMCRYPT_INTERNAL_LOAD_MSBFIRST32( p ) SYMCRYPT_BSWAP32( *((UINT32 *)(p)) ) +#define SYMCRYPT_INTERNAL_LOAD_LSBFIRST32( p ) ( *((UINT32 *)(p)) ) + +#define SYMCRYPT_INTERNAL_LOAD_MSBFIRST64( p ) ( (UINT64)SYMCRYPT_INTERNAL_LOAD_MSBFIRST32(&((PBYTE)p)[0]) << 32 | SYMCRYPT_INTERNAL_LOAD_MSBFIRST32(&((PBYTE)p)[4]) ) +#define SYMCRYPT_INTERNAL_LOAD_LSBFIRST64( p ) ( (UINT64)SYMCRYPT_INTERNAL_LOAD_LSBFIRST32(&((PBYTE)p)[4]) << 32 | SYMCRYPT_INTERNAL_LOAD_LSBFIRST32(&((PBYTE)p)[0]) ) + + + +#define SYMCRYPT_INTERNAL_STORE_MSBFIRST16( p, x ) ( *(UINT16 *)(p) = SYMCRYPT_BSWAP16(x) ) +#define SYMCRYPT_INTERNAL_STORE_LSBFIRST16( p, x ) ( *(UINT16 *)(p) = (x) ) +#define SYMCRYPT_INTERNAL_STORE_MSBFIRST32( p, x ) ( *(UINT32 *)(p) = SYMCRYPT_BSWAP32(x) ) +#define SYMCRYPT_INTERNAL_STORE_LSBFIRST32( p, x ) ( *(UINT32 *)(p) = (x) ) +#define SYMCRYPT_INTERNAL_STORE_MSBFIRST64( p, x ) { \ + SYMCRYPT_INTERNAL_STORE_MSBFIRST32( &((PBYTE)p)[0],(UINT32)(((UINT64)(x))>>32) );\ + SYMCRYPT_INTERNAL_STORE_MSBFIRST32( &((PBYTE)p)[4],(UINT32)(x));\ + } + +#define SYMCRYPT_INTERNAL_STORE_LSBFIRST64( p, x ) { \ + SYMCRYPT_INTERNAL_STORE_LSBFIRST32( &((PBYTE)p)[0], (UINT32)((x) ) );\ + SYMCRYPT_INTERNAL_STORE_LSBFIRST32( &((PBYTE)p)[4], (UINT32)(((UINT64)(x))>>32) );\ + } +#else // unknown platform + +// +// These functions have to handle arbitrary alignments too, so we do them byte-by-byte in the +// generic case. +// So far these macros have not been fully tested +// +#define SYMCRYPT_INTERNAL_LOAD_MSBFIRST16( p ) ( ((UINT16)((PBYTE)p)[0]) << 8 | ((PBYTE)p)[1] ) +#define SYMCRYPT_INTERNAL_LOAD_LSBFIRST16( p ) ( ((UINT16)((PBYTE)p)[1]) << 8 | ((PBYTE)p)[0] ) +#define SYMCRYPT_INTERNAL_LOAD_MSBFIRST32( p ) ( (UINT32)SYMCRYPT_INTERNAL_LOAD_MSBFIRST16(&((PBYTE)p)[0]) << 16 | SYMCRYPT_INTERNAL_LOAD_MSBFIRST16(&((PBYTE)p)[2]) ) +#define SYMCRYPT_INTERNAL_LOAD_LSBFIRST32( p ) ( (UINT32)SYMCRYPT_INTERNAL_LOAD_LSBFIRST16(&((PBYTE)p)[2]) << 16 | SYMCRYPT_INTERNAL_LOAD_LSBFIRST16(&((PBYTE)p)[0]) ) +#define SYMCRYPT_INTERNAL_LOAD_MSBFIRST64( p ) ( (UINT64)SYMCRYPT_INTERNAL_LOAD_MSBFIRST32(&((PBYTE)p)[0]) << 32 | SYMCRYPT_INTERNAL_LOAD_MSBFIRST32(&((PBYTE)p)[4]) ) +#define SYMCRYPT_INTERNAL_LOAD_LSBFIRST64( p ) ( (UINT64)SYMCRYPT_INTERNAL_LOAD_LSBFIRST32(&((PBYTE)p)[4]) << 32 | SYMCRYPT_INTERNAL_LOAD_LSBFIRST32(&((PBYTE)p)[0]) ) + +#define SYMCRYPT_INTERNAL_STORE_MSBFIRST16( p, x ) { \ + ((PBYTE)p)[0] = (BYTE)((x)>> 8);\ + ((PBYTE)p)[1] = (BYTE)((x) );\ + } + +#define SYMCRYPT_INTERNAL_STORE_LSBFIRST16( p, x ) { \ + ((PBYTE)p)[0] = (BYTE)((x) );\ + ((PBYTE)p)[1] = (BYTE)((x)>> 8);\ + } + +#define SYMCRYPT_INTERNAL_STORE_MSBFIRST32( p, x ) { \ + ((PBYTE)p)[0] = (BYTE)((x)>>24);\ + ((PBYTE)p)[1] = (BYTE)((x)>>16);\ + ((PBYTE)p)[2] = (BYTE)((x)>> 8);\ + ((PBYTE)p)[3] = (BYTE)((x) );\ + } + +#define SYMCRYPT_INTERNAL_STORE_LSBFIRST32( p, x ) { \ + ((PBYTE)p)[0] = (BYTE)((x) );\ + ((PBYTE)p)[1] = (BYTE)((x)>> 8);\ + ((PBYTE)p)[2] = (BYTE)((x)>>16);\ + ((PBYTE)p)[3] = (BYTE)((x)>>24);\ + } + +#define SYMCRYPT_INTERNAL_STORE_MSBFIRST64( p, x ) { \ + SYMCRYPT_INTERNAL_STORE_MSBFIRST32( &((PBYTE)p)[0],(UINT32)(((UINT64)(x))>>32) );\ + SYMCRYPT_INTERNAL_STORE_MSBFIRST32( &((PBYTE)p)[4],(UINT32)(x));\ + } + +#define SYMCRYPT_INTERNAL_STORE_LSBFIRST64( p, x ) { \ + SYMCRYPT_INTERNAL_STORE_LSBFIRST32( &((PBYTE)p)[0], (UINT32)((x) ) );\ + SYMCRYPT_INTERNAL_STORE_LSBFIRST32( &((PBYTE)p)[4], (UINT32)(((UINT64)(x))>>32) );\ + } + +#endif // platform switch for load/store macros + + +//============================================================================================== +// INTERNAL DATA STRUCTURES +//============================================================================================== +// +// Note: we do not use the symbolic names like SYMCRYPT_SHA1_INPUT_BLOCK_SIZE as this +// file is included before that name is defined. Fixing that would make the public API header +// file harder to read by moving the constant away from the associated functions, or forcing +// the header file to use the struct name rather than the typedef. The current solution +// works quite well. +// + +//----------------------------------------------------------------- +// Block cipher description table +// Below are the typedefs for the block cipher description table type +// Callers can use this to define their own block cipher and use the block cipher +// modes. +// + +typedef struct _SYMCRYPT_BLOCKCIPHER SYMCRYPT_BLOCKCIPHER, *PSYMCRYPT_BLOCKCIPHER; +typedef const SYMCRYPT_BLOCKCIPHER * PCSYMCRYPT_BLOCKCIPHER; + +// +// Note that blockSize must be <= 32 and must be a power of two. This is true for all the block ciphers +// implemented in SymCrypt. +// + +// +// HASH STATES +// +// All hash states have the same basic structure. This allows all hash implementations to share +// the same buffer management code. Some algorithms might still have optimized buffer management code +// specific for their algorithm, but most algs use the generic code. +// This is especially important for parallel hashing, where the buffer management & parallel organizational +// code are tightly coupled. +// + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_COMMON_HASH_STATE +{ + UINT32 bytesInBuffer; + SYMCRYPT_MAGIC_FIELD + UINT64 dataLengthL; // lower part of msg length + UINT64 dataLengthH; // upper part of msg length + SYMCRYPT_ALIGN BYTE buffer[SYMCRYPT_ANYSIZE_ARRAY]; // Size depends on algorithm + // ... + // Chaining state // type/location depends on algorithm + // +} SYMCRYPT_COMMON_HASH_STATE, *PSYMCRYPT_COMMON_HASH_STATE; + + +// +// SYMCRYPT_MD2_STATE +// +// Data structure that stores the state of an ongoing MD2 computation. +// +// The field names are from RFC 1319. +// It would be more efficient to store only the first 16 bytes of the X array, +// but that would complicate the code and MD2 isn't important enough to add +// extra complications. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_MD2_CHAINING_STATE +{ + SYMCRYPT_ALIGN BYTE C[16]; // State for internal checksum computation + BYTE X[48]; // State for actual hash chaining +} SYMCRYPT_MD2_CHAINING_STATE, *PSYMCRYPT_MD2_CHAINING_STATE; + +// +// MD2 hash computation state. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_MD2_STATE +{ + UINT32 bytesInBuffer; + SYMCRYPT_MAGIC_FIELD + UINT64 dataLengthL; // lower part of msg length + UINT64 dataLengthH; // upper part of msg length + SYMCRYPT_ALIGN BYTE buffer[16]; // buffer to keep one input block in + SYMCRYPT_MD2_CHAINING_STATE chain; +} SYMCRYPT_MD2_STATE, *PSYMCRYPT_MD2_STATE; +typedef const SYMCRYPT_MD2_STATE *PCSYMCRYPT_MD2_STATE; + +// +// SYMCRYPT_MD4_STATE +// +// Data structure that stores the state of an ongoing MD4 computation. +// The buffer contains dataLength % 64 bytes of data. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_MD4_CHAINING_STATE +{ + UINT32 H[4]; +} SYMCRYPT_MD4_CHAINING_STATE, *PSYMCRYPT_MD4_CHAINING_STATE; + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_MD4_STATE +{ + UINT32 bytesInBuffer; + SYMCRYPT_MAGIC_FIELD + UINT64 dataLengthL; // lower part of msg length + UINT64 dataLengthH; // upper part of msg length + SYMCRYPT_ALIGN BYTE buffer[64]; // buffer to keep one input block in + SYMCRYPT_MD4_CHAINING_STATE chain; // chaining state +} SYMCRYPT_MD4_STATE, *PSYMCRYPT_MD4_STATE; +typedef const SYMCRYPT_MD4_STATE *PCSYMCRYPT_MD4_STATE; + + +// +// SYMCRYPT_MD5_STATE +// +// Data structure that stores the state of an ongoing MD5 computation. +// The buffer contains dataLength % 64 bytes of data. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_MD5_CHAINING_STATE +{ + UINT32 H[4]; +} SYMCRYPT_MD5_CHAINING_STATE, *PSYMCRYPT_MD5_CHAINING_STATE; + + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_MD5_STATE +{ + UINT32 bytesInBuffer; + SYMCRYPT_MAGIC_FIELD + UINT64 dataLengthL; // lower part of msg length + UINT64 dataLengthH; // upper part of msg length + SYMCRYPT_ALIGN BYTE buffer[64]; // buffer to keep one input block in + SYMCRYPT_MD5_CHAINING_STATE chain; // chaining state +} SYMCRYPT_MD5_STATE, *PSYMCRYPT_MD5_STATE; +typedef const SYMCRYPT_MD5_STATE *PCSYMCRYPT_MD5_STATE; + + +// +// SYMCRYPT_SHA1_STATE +// +// Data structure that stores the state of an ongoing SHA1 computation. +// The buffer contains dataLength % 64 bytes of data. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_SHA1_CHAINING_STATE +{ + UINT32 H[5]; +} SYMCRYPT_SHA1_CHAINING_STATE, *PSYMCRYPT_SHA1_CHAINING_STATE; + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_SHA1_STATE +{ + UINT32 bytesInBuffer; + SYMCRYPT_MAGIC_FIELD + UINT64 dataLengthL; // lower part of msg length + UINT64 dataLengthH; // upper part of msg length + SYMCRYPT_ALIGN BYTE buffer[64]; // buffer to keep one input block in + SYMCRYPT_SHA1_CHAINING_STATE chain; // chaining state +} SYMCRYPT_SHA1_STATE, *PSYMCRYPT_SHA1_STATE; +typedef const SYMCRYPT_SHA1_STATE *PCSYMCRYPT_SHA1_STATE; + + +// +// SYMCRYPT_SHA256_STATE +// +// Data structure that stores the state of an ongoing SHA256 computation. +// The buffer contains dataLength % 64 bytes of data. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_SHA256_CHAINING_STATE +{ + SYMCRYPT_ALIGN UINT32 H[8]; +} SYMCRYPT_SHA256_CHAINING_STATE, * PSYMCRYPT_SHA256_CHAINING_STATE; + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_SHA256_STATE +{ + UINT32 bytesInBuffer; + SYMCRYPT_MAGIC_FIELD + UINT64 dataLengthL; // lower part of msg length + UINT64 dataLengthH; // upper part of msg length + SYMCRYPT_ALIGN BYTE buffer[64]; // buffer to keep one input block in + SYMCRYPT_SHA256_CHAINING_STATE chain; // chaining state +} SYMCRYPT_SHA256_STATE, *PSYMCRYPT_SHA256_STATE; +typedef const SYMCRYPT_SHA256_STATE *PCSYMCRYPT_SHA256_STATE; + + +// +// SYMCRYPT_SHA224_STATE +// +// This is identical to the SHA256 state. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_SHA224_STATE +{ + UINT32 bytesInBuffer; + SYMCRYPT_MAGIC_FIELD + UINT64 dataLengthL; // lower part of msg length + UINT64 dataLengthH; // upper part of msg length + SYMCRYPT_ALIGN BYTE buffer[64]; // buffer to keep one input block in + SYMCRYPT_SHA256_CHAINING_STATE chain; // chaining state +} SYMCRYPT_SHA224_STATE, *PSYMCRYPT_SHA224_STATE; +typedef const SYMCRYPT_SHA224_STATE *PCSYMCRYPT_SHA224_STATE; + + +// +// SYMCRYPT_SHA512_STATE +// +// Data structure that stores the state of an ongoing SHA512 computation. +// The buffer contains dataLength % 128 bytes of data. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_SHA512_CHAINING_STATE +{ + UINT64 H[8]; +} SYMCRYPT_SHA512_CHAINING_STATE, *PSYMCRYPT_SHA512_CHAINING_STATE; + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_SHA512_STATE +{ + UINT32 bytesInBuffer; + SYMCRYPT_MAGIC_FIELD + UINT64 dataLengthL; // lower part of msg length + UINT64 dataLengthH; // upper part of msg length + SYMCRYPT_ALIGN BYTE buffer[128]; // buffer to keep one input block in + SYMCRYPT_SHA512_CHAINING_STATE chain; // chaining state +} SYMCRYPT_SHA512_STATE, *PSYMCRYPT_SHA512_STATE; +typedef const SYMCRYPT_SHA512_STATE *PCSYMCRYPT_SHA512_STATE; + + +// +// SYMCRYPT_SHA384_STATE +// +// This is identical to the SHA512. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_SHA384_STATE +{ + UINT32 bytesInBuffer; + SYMCRYPT_MAGIC_FIELD + UINT64 dataLengthL; // lower part of msg length + UINT64 dataLengthH; // upper part of msg length + SYMCRYPT_ALIGN BYTE buffer[128]; // buffer to keep one input block in + SYMCRYPT_SHA512_CHAINING_STATE chain; // chaining state +} SYMCRYPT_SHA384_STATE, *PSYMCRYPT_SHA384_STATE; +typedef const SYMCRYPT_SHA384_STATE *PCSYMCRYPT_SHA384_STATE; + + +// +// SYMCRYPT_SHA512_224_STATE +// +// This is identical to the SHA512. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_SHA512_224_STATE +{ + UINT32 bytesInBuffer; + SYMCRYPT_MAGIC_FIELD + UINT64 dataLengthL; // lower part of msg length + UINT64 dataLengthH; // upper part of msg length + SYMCRYPT_ALIGN BYTE buffer[128]; // buffer to keep one input block in + SYMCRYPT_SHA512_CHAINING_STATE chain; // chaining state +} SYMCRYPT_SHA512_224_STATE, *PSYMCRYPT_SHA512_224_STATE; +typedef const SYMCRYPT_SHA512_224_STATE *PCSYMCRYPT_SHA512_224_STATE; + + +// +// SYMCRYPT_SHA512_256_STATE +// +// This is identical to the SHA512. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_SHA512_256_STATE +{ + UINT32 bytesInBuffer; + SYMCRYPT_MAGIC_FIELD + UINT64 dataLengthL; // lower part of msg length + UINT64 dataLengthH; // upper part of msg length + SYMCRYPT_ALIGN BYTE buffer[128]; // buffer to keep one input block in + SYMCRYPT_SHA512_CHAINING_STATE chain; // chaining state +} SYMCRYPT_SHA512_256_STATE, *PSYMCRYPT_SHA512_256_STATE; +typedef const SYMCRYPT_SHA512_256_STATE *PCSYMCRYPT_SHA512_256_STATE; + + +// +// SYMCRYPT_KECCAK_STATE +// +// Data structure that stores the state of an ongoing SHA-3 derived algorithm computation. +// + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_KECCAK_STATE +{ + SYMCRYPT_ALIGN UINT64 state[25]; // state for Keccak-f[1600] permutation + UINT32 inputBlockSize; // rate + UINT32 stateIndex; // position in the state for next merge/extract operation + UINT8 paddingValue; // Keccak padding value + BOOLEAN squeezeMode; // denotes whether the state is in squeeze mode +} SYMCRYPT_KECCAK_STATE, *PSYMCRYPT_KECCAK_STATE; +typedef const SYMCRYPT_KECCAK_STATE *PCSYMCRYPT_KECCAK_STATE; + +// +// SYMCRYPT_SHA3_224_STATE +// +// Data structure that stores the state of an ongoing SHA3-224 computation. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_SHA3_224_STATE +{ + SYMCRYPT_KECCAK_STATE ks; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_SHA3_224_STATE, * PSYMCRYPT_SHA3_224_STATE; +typedef const SYMCRYPT_SHA3_224_STATE* PCSYMCRYPT_SHA3_224_STATE; + +// +// SYMCRYPT_SHA3_256_STATE +// +// Data structure that stores the state of an ongoing SHA3-256 computation. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_SHA3_256_STATE +{ + SYMCRYPT_KECCAK_STATE ks; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_SHA3_256_STATE, * PSYMCRYPT_SHA3_256_STATE; +typedef const SYMCRYPT_SHA3_256_STATE* PCSYMCRYPT_SHA3_256_STATE; + +// +// SYMCRYPT_SHA3_384_STATE +// +// Data structure that stores the state of an ongoing SHA3-384 computation. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_SHA3_384_STATE +{ + SYMCRYPT_KECCAK_STATE ks; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_SHA3_384_STATE, * PSYMCRYPT_SHA3_384_STATE; +typedef const SYMCRYPT_SHA3_384_STATE* PCSYMCRYPT_SHA3_384_STATE; + +// +// SYMCRYPT_SHA3_512_STATE +// +// Data structure that stores the state of an ongoing SHA3-512 computation. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_SHA3_512_STATE +{ + SYMCRYPT_KECCAK_STATE ks; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_SHA3_512_STATE, * PSYMCRYPT_SHA3_512_STATE; +typedef const SYMCRYPT_SHA3_512_STATE* PCSYMCRYPT_SHA3_512_STATE; + +// +// SYMCRYPT_SHAKE128_STATE +// +// Data structure that stores the state of an ongoing SHAKE128 computation. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_SHAKE128_STATE +{ + SYMCRYPT_KECCAK_STATE ks; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_SHAKE128_STATE, * PSYMCRYPT_SHAKE128_STATE; +typedef const SYMCRYPT_SHAKE128_STATE* PCSYMCRYPT_SHAKE128_STATE; + +// +// SYMCRYPT_SHAKE256_STATE +// +// Data structure that stores the state of an ongoing SHAKE256 computation. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_SHAKE256_STATE +{ + SYMCRYPT_KECCAK_STATE ks; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_SHAKE256_STATE, * PSYMCRYPT_SHAKE256_STATE; +typedef const SYMCRYPT_SHAKE256_STATE* PCSYMCRYPT_SHAKE256_STATE; + +// +// SYMCRYPT_CSHAKE128_STATE +// +// Data structure that stores the state of an ongoing CSHAKE128 computation. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_CSHAKE128_STATE +{ + SYMCRYPT_KECCAK_STATE ks; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_CSHAKE128_STATE, * PSYMCRYPT_CSHAKE128_STATE; +typedef const SYMCRYPT_CSHAKE128_STATE* PCSYMCRYPT_CSHAKE128_STATE; + +// +// SYMCRYPT_CSHAKE256_STATE +// +// Data structure that stores the state of an ongoing CSHAKE256 computation. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_CSHAKE256_STATE +{ + SYMCRYPT_KECCAK_STATE ks; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_CSHAKE256_STATE, * PSYMCRYPT_CSHAKE256_STATE; +typedef const SYMCRYPT_CSHAKE256_STATE* PCSYMCRYPT_CSHAKE256_STATE; + +// +// SYMCRYPT_KMAC128_EXPANDED_KEY +// +// Data structure that stores the expanded key for KMAC128. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_KMAC128_EXPANDED_KEY +{ + SYMCRYPT_KECCAK_STATE ks; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_KMAC128_EXPANDED_KEY, * PSYMCRYPT_KMAC128_EXPANDED_KEY; +typedef const SYMCRYPT_KMAC128_EXPANDED_KEY* PCSYMCRYPT_KMAC128_EXPANDED_KEY; + +// +// SYMCRYPT_KMAC128_STATE +// +// Data structure that stores the state of an ongoing KMAC128 computation. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_KMAC128_STATE +{ + SYMCRYPT_KECCAK_STATE ks; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_KMAC128_STATE, * PSYMCRYPT_KMAC128_STATE; +typedef const SYMCRYPT_KMAC128_STATE* PCSYMCRYPT_KMAC128_STATE; + +// +// SYMCRYPT_KMAC256_EXPANDED_KEY +// +// Data structure that stores the expanded key for KMAC256. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_KMAC256_EXPANDED_KEY +{ + SYMCRYPT_KECCAK_STATE ks; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_KMAC256_EXPANDED_KEY, * PSYMCRYPT_KMAC256_EXPANDED_KEY; +typedef const SYMCRYPT_KMAC256_EXPANDED_KEY* PCSYMCRYPT_KMAC256_EXPANDED_KEY; + +// +// SYMCRYPT_KMAC256_STATE +// +// Data structure that stores the state of an ongoing KMAC256 computation. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_KMAC256_STATE +{ + SYMCRYPT_KECCAK_STATE ks; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_KMAC256_STATE, * PSYMCRYPT_KMAC256_STATE; +typedef const SYMCRYPT_KMAC256_STATE* PCSYMCRYPT_KMAC256_STATE; + + +// +// Generic hashing +// + +typedef struct _SYMCRYPT_OID { + UINT32 cbOID; + _Field_size_( cbOID ) PCBYTE pbOID; +} SYMCRYPT_OID, *PSYMCRYPT_OID; +typedef const SYMCRYPT_OID *PCSYMCRYPT_OID; + +// +// OID lists for the most commonly used hash functions +// + +#define SYMCRYPT_MD5_OID_COUNT (2) +extern const SYMCRYPT_OID SymCryptMd5OidList[SYMCRYPT_MD5_OID_COUNT]; + +#define SYMCRYPT_SHA1_OID_COUNT (2) +extern const SYMCRYPT_OID SymCryptSha1OidList[SYMCRYPT_SHA1_OID_COUNT]; + +#define SYMCRYPT_SHA224_OID_COUNT (2) +extern const SYMCRYPT_OID SymCryptSha224OidList[SYMCRYPT_SHA224_OID_COUNT]; + +#define SYMCRYPT_SHA256_OID_COUNT (2) +extern const SYMCRYPT_OID SymCryptSha256OidList[SYMCRYPT_SHA256_OID_COUNT]; + +#define SYMCRYPT_SHA384_OID_COUNT (2) +extern const SYMCRYPT_OID SymCryptSha384OidList[SYMCRYPT_SHA384_OID_COUNT]; + +#define SYMCRYPT_SHA512_OID_COUNT (2) +extern const SYMCRYPT_OID SymCryptSha512OidList[SYMCRYPT_SHA512_OID_COUNT]; + +#define SYMCRYPT_SHA512_224_OID_COUNT (2) +extern const SYMCRYPT_OID SymCryptSha512_224OidList[SYMCRYPT_SHA512_224_OID_COUNT]; + +#define SYMCRYPT_SHA512_256_OID_COUNT (2) +extern const SYMCRYPT_OID SymCryptSha512_256OidList[SYMCRYPT_SHA512_256_OID_COUNT]; + +#define SYMCRYPT_SHA3_224_OID_COUNT (2) +extern const SYMCRYPT_OID SymCryptSha3_224OidList[SYMCRYPT_SHA3_224_OID_COUNT]; + +#define SYMCRYPT_SHA3_256_OID_COUNT (2) +extern const SYMCRYPT_OID SymCryptSha3_256OidList[SYMCRYPT_SHA3_256_OID_COUNT]; + +#define SYMCRYPT_SHA3_384_OID_COUNT (2) +extern const SYMCRYPT_OID SymCryptSha3_384OidList[SYMCRYPT_SHA3_384_OID_COUNT]; + +#define SYMCRYPT_SHA3_512_OID_COUNT (2) +extern const SYMCRYPT_OID SymCryptSha3_512OidList[SYMCRYPT_SHA3_512_OID_COUNT]; + +#define SYMCRYPT_SHAKE128_OID_COUNT (2) +extern const SYMCRYPT_OID SymCryptShake128OidList[SYMCRYPT_SHAKE128_OID_COUNT]; + +#define SYMCRYPT_SHAKE256_OID_COUNT (2) +extern const SYMCRYPT_OID SymCryptShake256OidList[SYMCRYPT_SHAKE256_OID_COUNT]; + +typedef enum _SYMCRYPT_OID_LIST_ID +{ + SYMCRYPT_OID_LIST_ID_NULL = 0, + SYMCRYPT_OID_LIST_ID_MD5 = 1, + SYMCRYPT_OID_LIST_ID_SHA1 = 2, + SYMCRYPT_OID_LIST_ID_SHA224 = 3, + SYMCRYPT_OID_LIST_ID_SHA256 = 4, + SYMCRYPT_OID_LIST_ID_SHA384 = 5, + SYMCRYPT_OID_LIST_ID_SHA512 = 6, + SYMCRYPT_OID_LIST_ID_SHA512_224 = 7, + SYMCRYPT_OID_LIST_ID_SHA512_256 = 8, + SYMCRYPT_OID_LIST_ID_SHA3_224 = 9, + SYMCRYPT_OID_LIST_ID_SHA3_256 = 10, + SYMCRYPT_OID_LIST_ID_SHA3_384 = 11, + SYMCRYPT_OID_LIST_ID_SHA3_512 = 12, + SYMCRYPT_OID_LIST_ID_SHAKE128 = 13, + SYMCRYPT_OID_LIST_ID_SHAKE256 = 14 +} SYMCRYPT_OID_LIST_ID; + +PCSYMCRYPT_OID +SYMCRYPT_CALL +SymCryptGetOidList( SYMCRYPT_OID_LIST_ID oidId, _Out_opt_ SIZE_T* pCount ); +// +// Returns a pointer to the OID list for the specified OID list ID. If pCount is non-NULL, the +// pointed-to value will be set to the number of elements in the OID list. +// Returns NULL if the OID list ID is invalid. +// + +typedef union _SYMCRYPT_HASH_STATE +{ + SYMCRYPT_MD2_STATE md2State; + SYMCRYPT_MD4_STATE md4State; + SYMCRYPT_MD5_STATE md5State; + SYMCRYPT_SHA1_STATE sha1State; + SYMCRYPT_SHA224_STATE sha224State; + SYMCRYPT_SHA256_STATE sha256State; + SYMCRYPT_SHA384_STATE sha384State; + SYMCRYPT_SHA512_STATE sha512State; + SYMCRYPT_SHA512_224_STATE sha512_224State; + SYMCRYPT_SHA512_256_STATE sha512_256State; + SYMCRYPT_SHA3_224_STATE sha3_224State; + SYMCRYPT_SHA3_256_STATE sha3_256State; + SYMCRYPT_SHA3_384_STATE sha3_384State; + SYMCRYPT_SHA3_512_STATE sha3_512State; +} SYMCRYPT_HASH_STATE, *PSYMCRYPT_HASH_STATE; +typedef const SYMCRYPT_HASH_STATE *PCSYMCRYPT_HASH_STATE; + +#define SYMCRYPT_HASH_MAX_RESULT_SIZE SYMCRYPT_SHA512_RESULT_SIZE + +SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HASH; +SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_PARALLEL_HASH; + +typedef struct _SYMCRYPT_HASH SYMCRYPT_HASH, *PSYMCRYPT_HASH; +typedef const SYMCRYPT_HASH *PCSYMCRYPT_HASH; +typedef struct _SYMCRYPT_PARALLEL_HASH SYMCRYPT_PARALLEL_HASH, *PSYMCRYPT_PARALLEL_HASH; +typedef const SYMCRYPT_PARALLEL_HASH *PCSYMCRYPT_PARALLEL_HASH; + +typedef VOID (SYMCRYPT_CALL * PSYMCRYPT_HASH_INIT_FUNC) ( PVOID pState ); +typedef VOID (SYMCRYPT_CALL * PSYMCRYPT_HASH_APPEND_FUNC) ( PVOID pState, PCBYTE pbData, SIZE_T cbData ); +typedef VOID (SYMCRYPT_CALL * PSYMCRYPT_HASH_RESULT_FUNC) ( PVOID pState, PVOID pbResult ); +typedef VOID (SYMCRYPT_CALL * PSYMCRYPT_HASH_APPEND_BLOCKS_FUNC) ( PVOID pChain, PCBYTE pbData, SIZE_T cbData, SIZE_T * pcbRemaining ); +typedef VOID (SYMCRYPT_CALL * PSYMCRYPT_HASH_STATE_COPY_FUNC) ( PCVOID pStateSrc, PVOID pStateDst ); + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HASH +{ + PSYMCRYPT_HASH_INIT_FUNC initFunc; + PSYMCRYPT_HASH_APPEND_FUNC appendFunc; + PSYMCRYPT_HASH_RESULT_FUNC resultFunc; + PSYMCRYPT_HASH_APPEND_BLOCKS_FUNC appendBlockFunc; + PSYMCRYPT_HASH_STATE_COPY_FUNC stateCopyFunc; + UINT32 stateSize; // sizeof( hash state ) + UINT32 resultSize; // size of hash result + UINT32 inputBlockSize; + UINT32 chainOffset; // offset into state structure of the chaining state + UINT32 chainSize; // size of chaining state +} SYMCRYPT_HASH, *PSYMCRYPT_HASH; + + +// +// Parallel hashing +// + +#if SYMCRYPT_CPU_ARM +#define SYMCRYPT_PARALLEL_SHA256_MIN_PARALLELISM (3) +#define SYMCRYPT_PARALLEL_SHA256_MAX_PARALLELISM (4) +#else +#define SYMCRYPT_PARALLEL_SHA256_MIN_PARALLELISM (2) +#define SYMCRYPT_PARALLEL_SHA256_MAX_PARALLELISM (8) +#endif + +typedef enum _SYMCRYPT_HASH_OPERATION_TYPE { + SYMCRYPT_HASH_OPERATION_APPEND = 1, + SYMCRYPT_HASH_OPERATION_RESULT = 2, +} SYMCRYPT_HASH_OPERATION_TYPE; + +typedef struct _SYMCRYPT_PARALLEL_HASH_OPERATION SYMCRYPT_PARALLEL_HASH_OPERATION, *PSYMCRYPT_PARALLEL_HASH_OPERATION; +typedef const SYMCRYPT_PARALLEL_HASH_OPERATION *PCSYMRYPT_PARALLEL_HASH_OPERATION; + +struct _SYMCRYPT_PARALLEL_HASH_OPERATION { + SIZE_T iHash; // index of hash object into the state array + SYMCRYPT_HASH_OPERATION_TYPE hashOperation; // operation to be performed + _Field_size_( cbBuffer ) PBYTE pbBuffer; // data to be hashed, or result buffer + SIZE_T cbBuffer; // size of pbData buffer. + PSYMCRYPT_PARALLEL_HASH_OPERATION next; // internal scratch space; do not use. +}; + + +SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_PARALLEL_HASH_SCRATCH_OPERATION; // as yet unspecified struct +typedef struct _SYMCRYPT_PARALLEL_HASH_SCRATCH_OPERATION + SYMCRYPT_PARALLEL_HASH_SCRATCH_OPERATION, *PSYMCRYPT_PARALLEL_HASH_SCRATCH_OPERATION; + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_PARALLEL_HASH_SCRATCH_STATE { + PVOID hashState; // the actual hash state + BYTE processingState; + BYTE bytesAlreadyProcessed; // of the next Append operation + UINT64 bytes; // # bytes left to process on this state + PSYMCRYPT_PARALLEL_HASH_OPERATION next; // next operation to be performed. + PCBYTE pbData; // data/size of ongoing append operation; this op has already been removed from the next linked list + SIZE_T cbData; +}SYMCRYPT_PARALLEL_HASH_SCRATCH_STATE, *PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE; + + +// +// The scratch space used by parallel SHA-256 consists of three regions: +// - an array of SYMCRYPT_PARALLEL_HASH_SCRATCH_STATE structures, aligned to SYMCRYPT_ALIGN_VALUE. +// - the work array, an array of pointers to SYMCRYPT_PARALLEL_HASH_SCRATCH_STATEs. +// - an array of 4 + 8 + 64 SIMD vector elements, aligned to the size of those elements. +// +// +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 +#define SYMCRYPT_SIMD_ELEMENT_SIZE 32 +#elif SYMCRYPT_CPU_ARM | SYMCRYPT_CPU_ARM64 +#define SYMCRYPT_SIMD_ELEMENT_SIZE 16 +#elif SYMCRYPT_CPU_UNKNOWN +#define SYMCRYPT_SIMD_ELEMENT_SIZE 0 +#else +#error Unknown CPU +#endif + +#define SYMCRYPT_PARALLEL_SHA256_FIXED_SCRATCH ( (4 + 8 + 64) * SYMCRYPT_SIMD_ELEMENT_SIZE + SYMCRYPT_SIMD_ELEMENT_SIZE - 1 + SYMCRYPT_ALIGN_VALUE - 1 ) +#define SYMCRYPT_PARALLEL_SHA384_FIXED_SCRATCH ( (4 + 8 + 80) * SYMCRYPT_SIMD_ELEMENT_SIZE + SYMCRYPT_SIMD_ELEMENT_SIZE - 1 + SYMCRYPT_ALIGN_VALUE - 1 ) +#define SYMCRYPT_PARALLEL_SHA512_FIXED_SCRATCH ( (4 + 8 + 80) * SYMCRYPT_SIMD_ELEMENT_SIZE + SYMCRYPT_SIMD_ELEMENT_SIZE - 1 + SYMCRYPT_ALIGN_VALUE - 1 ) +#define SYMCRYPT_PARALLEL_HASH_PER_STATE_SCRATCH (sizeof( SYMCRYPT_PARALLEL_HASH_SCRATCH_STATE ) + sizeof( PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE ) ) + +SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_PARALLEL_HASH; +typedef struct _SYMCRYPT_PARALLEL_HASH SYMCRYPT_PARALLEL_HASH, *PSYMCRYPT_PARALLEL_HASH; +typedef const SYMCRYPT_PARALLEL_HASH *PCSYMCRYPT_PARALLEL_HASH; + +typedef BOOLEAN (SYMCRYPT_CALL * PSYMCRYPT_PARALLEL_HASH_RESULT_FUNC) (PCSYMCRYPT_PARALLEL_HASH pParHash, PSYMCRYPT_COMMON_HASH_STATE pState, PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE pScratch, BOOLEAN *pRes ); +typedef VOID (SYMCRYPT_CALL * PSYMCRYPT_PARALLEL_HASH_RESULT_DONE_FUNC ) (PCSYMCRYPT_PARALLEL_HASH pParHash, PSYMCRYPT_COMMON_HASH_STATE pState, PCSYMRYPT_PARALLEL_HASH_OPERATION pOp); +typedef VOID (SYMCRYPT_CALL * PSYMCRYPT_PARALLEL_APPEND_FUNC) ( + _Inout_updates_( nPar ) PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE * pWork, + SIZE_T nPar, + SIZE_T nBytes, + _Out_writes_( cbSimdScratch ) PBYTE pbSimdScratch, + SIZE_T cbSimdScratch ); + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_PARALLEL_HASH +{ + PCSYMCRYPT_HASH pHash; + UINT32 parScratchFixed; // fixed scratch size for parallel hash + PSYMCRYPT_PARALLEL_HASH_RESULT_FUNC parResult1Func; + PSYMCRYPT_PARALLEL_HASH_RESULT_FUNC parResult2Func; + PSYMCRYPT_PARALLEL_HASH_RESULT_DONE_FUNC parResultDoneFunc; + + PSYMCRYPT_PARALLEL_APPEND_FUNC parAppendFunc; +} SYMCRYPT_PARALLEL_HASH, *PSYMCRYPT_PARALLEL_HASH; + + +//====================================================================================================== +// MAC +// + + +// +// SYMCRYPT_HMAC_MD5_EXPANDED_KEY +// +// Data structure to store an expanded key for HMAC-MD5. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_MD5_EXPANDED_KEY +{ + SYMCRYPT_MD5_CHAINING_STATE innerState; + SYMCRYPT_MD5_CHAINING_STATE outerState; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_HMAC_MD5_EXPANDED_KEY, *PSYMCRYPT_HMAC_MD5_EXPANDED_KEY; +typedef const SYMCRYPT_HMAC_MD5_EXPANDED_KEY * PCSYMCRYPT_HMAC_MD5_EXPANDED_KEY; + +// +// SYMCRYPT_HMAC_MD5_STATE +// +// Data structure that encodes an ongoing HMAC-MD5 computation. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_MD5_STATE +{ + SYMCRYPT_MD5_STATE hash; + PCSYMCRYPT_HMAC_MD5_EXPANDED_KEY pKey; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_HMAC_MD5_STATE, *PSYMCRYPT_HMAC_MD5_STATE; +typedef const SYMCRYPT_HMAC_MD5_STATE *PCSYMCRYPT_HMAC_MD5_STATE; + + +// +// SYMCRYPT_HMAC_SHA1_EXPANDED_KEY +// +// Data structure to store an expanded key for HMAC-SHA1. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_SHA1_EXPANDED_KEY +{ + SYMCRYPT_SHA1_CHAINING_STATE innerState; + SYMCRYPT_SHA1_CHAINING_STATE outerState; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_HMAC_SHA1_EXPANDED_KEY, *PSYMCRYPT_HMAC_SHA1_EXPANDED_KEY; +typedef const SYMCRYPT_HMAC_SHA1_EXPANDED_KEY * PCSYMCRYPT_HMAC_SHA1_EXPANDED_KEY; + +// +// SYMCRYPT_HMAC_SHA1_STATE +// +// Data structure that encodes an ongoing HMAC-SHA1 computation. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_SHA1_STATE +{ + SYMCRYPT_SHA1_STATE hash; + PCSYMCRYPT_HMAC_SHA1_EXPANDED_KEY pKey; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_HMAC_SHA1_STATE, *PSYMCRYPT_HMAC_SHA1_STATE; +typedef const SYMCRYPT_HMAC_SHA1_STATE *PCSYMCRYPT_HMAC_SHA1_STATE; + + +// +// SYMCRYPT_HMAC_SHA224_EXPANDED_KEY +// +// Data structure to store an expanded key for HMAC-SHA224. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_SHA224_EXPANDED_KEY +{ + SYMCRYPT_SHA256_CHAINING_STATE innerState; + SYMCRYPT_SHA256_CHAINING_STATE outerState; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_HMAC_SHA224_EXPANDED_KEY, *PSYMCRYPT_HMAC_SHA224_EXPANDED_KEY; +typedef const SYMCRYPT_HMAC_SHA224_EXPANDED_KEY * PCSYMCRYPT_HMAC_SHA224_EXPANDED_KEY; + +// +// SYMCRYPT_HMAC_SHA224_STATE +// +// Data structure that encodes an ongoing HMAC-SHA224 computation. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_SHA224_STATE +{ + SYMCRYPT_SHA224_STATE hash; + PCSYMCRYPT_HMAC_SHA224_EXPANDED_KEY pKey; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_HMAC_SHA224_STATE, *PSYMCRYPT_HMAC_SHA224_STATE; +typedef const SYMCRYPT_HMAC_SHA224_STATE *PCSYMCRYPT_HMAC_SHA224_STATE; + + +// +// SYMCRYPT_HMAC_SHA256_EXPANDED_KEY +// +// Data structure to store an expanded key for HMAC-SHA256. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_SHA256_EXPANDED_KEY +{ + SYMCRYPT_SHA256_CHAINING_STATE innerState; + SYMCRYPT_SHA256_CHAINING_STATE outerState; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_HMAC_SHA256_EXPANDED_KEY, *PSYMCRYPT_HMAC_SHA256_EXPANDED_KEY; +typedef const SYMCRYPT_HMAC_SHA256_EXPANDED_KEY * PCSYMCRYPT_HMAC_SHA256_EXPANDED_KEY; + +// +// SYMCRYPT_HMAC_SHA256_STATE +// +// Data structure that encodes an ongoing HMAC-SHA256 computation. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_SHA256_STATE +{ + SYMCRYPT_SHA256_STATE hash; + PCSYMCRYPT_HMAC_SHA256_EXPANDED_KEY pKey; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_HMAC_SHA256_STATE, *PSYMCRYPT_HMAC_SHA256_STATE; +typedef const SYMCRYPT_HMAC_SHA256_STATE *PCSYMCRYPT_HMAC_SHA256_STATE; + + +// +// SYMCRYPT_HMAC_SHA384_EXPANDED_KEY +// +// Data structure to store an expanded key for HMAC-SHA384. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_SHA384_EXPANDED_KEY +{ + SYMCRYPT_SHA512_CHAINING_STATE innerState; + SYMCRYPT_SHA512_CHAINING_STATE outerState; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_HMAC_SHA384_EXPANDED_KEY, *PSYMCRYPT_HMAC_SHA384_EXPANDED_KEY; +typedef const SYMCRYPT_HMAC_SHA384_EXPANDED_KEY * PCSYMCRYPT_HMAC_SHA384_EXPANDED_KEY; + +// +// SYMCRYPT_HMAC_SHA384_STATE +// +// Data structure that encodes an ongoing HMAC-SHA384 computation. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_SHA384_STATE +{ + SYMCRYPT_SHA384_STATE hash; + PCSYMCRYPT_HMAC_SHA384_EXPANDED_KEY pKey; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_HMAC_SHA384_STATE, *PSYMCRYPT_HMAC_SHA384_STATE; +typedef const SYMCRYPT_HMAC_SHA384_STATE *PCSYMCRYPT_HMAC_SHA384_STATE; + +// +// SYMCRYPT_HMAC_SHA512_EXPANDED_KEY +// +// Data structure to store an expanded key for HMAC-SHA512. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_SHA512_EXPANDED_KEY +{ + SYMCRYPT_SHA512_CHAINING_STATE innerState; + SYMCRYPT_SHA512_CHAINING_STATE outerState; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_HMAC_SHA512_EXPANDED_KEY, *PSYMCRYPT_HMAC_SHA512_EXPANDED_KEY; +typedef const SYMCRYPT_HMAC_SHA512_EXPANDED_KEY * PCSYMCRYPT_HMAC_SHA512_EXPANDED_KEY; + +// +// SYMCRYPT_HMAC_SHA512_STATE +// +// Data structure that encodes an ongoing HMAC-SHA512 computation. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_SHA512_STATE +{ + SYMCRYPT_SHA512_STATE hash; + PCSYMCRYPT_HMAC_SHA512_EXPANDED_KEY pKey; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_HMAC_SHA512_STATE, *PSYMCRYPT_HMAC_SHA512_STATE; +typedef const SYMCRYPT_HMAC_SHA512_STATE *PCSYMCRYPT_HMAC_SHA512_STATE; + +// +// SYMCRYPT_HMAC_SHA512_224_EXPANDED_KEY +// +// Data structure to store an expanded key for HMAC-SHA512_224. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_SHA512_224_EXPANDED_KEY +{ + SYMCRYPT_SHA512_CHAINING_STATE innerState; + SYMCRYPT_SHA512_CHAINING_STATE outerState; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_HMAC_SHA512_224_EXPANDED_KEY, *PSYMCRYPT_HMAC_SHA512_224_EXPANDED_KEY; +typedef const SYMCRYPT_HMAC_SHA512_224_EXPANDED_KEY * PCSYMCRYPT_HMAC_SHA512_224_EXPANDED_KEY; + +// +// SYMCRYPT_HMAC_SHA512_224_STATE +// +// Data structure that encodes an ongoing HMAC-SHA512_224 computation. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_SHA512_224_STATE +{ + SYMCRYPT_SHA512_224_STATE hash; + PCSYMCRYPT_HMAC_SHA512_224_EXPANDED_KEY pKey; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_HMAC_SHA512_224_STATE, *PSYMCRYPT_HMAC_SHA512_224_STATE; +typedef const SYMCRYPT_HMAC_SHA512_224_STATE *PCSYMCRYPT_HMAC_SHA512_224_STATE; + +// +// SYMCRYPT_HMAC_SHA512_256_EXPANDED_KEY +// +// Data structure to store an expanded key for HMAC-SHA512_256. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_SHA512_256_EXPANDED_KEY +{ + SYMCRYPT_SHA512_CHAINING_STATE innerState; + SYMCRYPT_SHA512_CHAINING_STATE outerState; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_HMAC_SHA512_256_EXPANDED_KEY, *PSYMCRYPT_HMAC_SHA512_256_EXPANDED_KEY; +typedef const SYMCRYPT_HMAC_SHA512_256_EXPANDED_KEY * PCSYMCRYPT_HMAC_SHA512_256_EXPANDED_KEY; + +// +// SYMCRYPT_HMAC_SHA512_256_STATE +// +// Data structure that encodes an ongoing HMAC-SHA512_256 computation. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_SHA512_256_STATE +{ + SYMCRYPT_SHA512_256_STATE hash; + PCSYMCRYPT_HMAC_SHA512_256_EXPANDED_KEY pKey; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_HMAC_SHA512_256_STATE, *PSYMCRYPT_HMAC_SHA512_256_STATE; +typedef const SYMCRYPT_HMAC_SHA512_256_STATE *PCSYMCRYPT_HMAC_SHA512_256_STATE; + +// +// SYMCRYPT_HMAC_EXPANDED_KEY +// +// Generic HMAC Expanded Key data structure +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_EXPANDED_KEY +{ + PCSYMCRYPT_HASH pHash; + SYMCRYPT_HASH_STATE innerState; + SYMCRYPT_HASH_STATE outerState; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_HMAC_EXPANDED_KEY, * PSYMCRYPT_HMAC_EXPANDED_KEY; +typedef const SYMCRYPT_HMAC_EXPANDED_KEY* PCSYMCRYPT_HMAC_EXPANDED_KEY; + +// +// SYMCRYPT_HMAC_STATE +// +// Generic HMAC data structure +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_STATE +{ + PCSYMCRYPT_HMAC_EXPANDED_KEY pKey; + SYMCRYPT_HASH_STATE hash; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_HMAC_STATE, * PSYMCRYPT_HMAC_STATE; +typedef const SYMCRYPT_HMAC_STATE* PCSYMCRYPT_HMAC_STATE; + +// +// SYMCRYPT_HMAC_SHA3_224_EXPANDED_KEY +// +// Data structure to store an expanded key for HMAC-SHA3-224 +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_SHA3_224_EXPANDED_KEY +{ + SYMCRYPT_HMAC_EXPANDED_KEY generic; + +} SYMCRYPT_HMAC_SHA3_224_EXPANDED_KEY, *PSYMCRYPT_HMAC_SHA3_224_EXPANDED_KEY; +typedef const SYMCRYPT_HMAC_SHA3_224_EXPANDED_KEY * PCSYMCRYPT_HMAC_SHA3_224_EXPANDED_KEY; + +// +// SYMCRYPT_HMAC_SHA3_224_STATE +// +// Data structure that encodes an ongoing HMAC-SHA3-224 computation. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_SHA3_224_STATE +{ + SYMCRYPT_HMAC_STATE generic; + +} SYMCRYPT_HMAC_SHA3_224_STATE, *PSYMCRYPT_HMAC_SHA3_224_STATE; +typedef const SYMCRYPT_HMAC_SHA3_224_STATE *PCSYMCRYPT_HMAC_SHA3_224_STATE; + +// +// SYMCRYPT_HMAC_SHA3_256_EXPANDED_KEY +// +// Data structure to store an expanded key for HMAC-SHA3-256 +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_SHA3_256_EXPANDED_KEY +{ + SYMCRYPT_HMAC_EXPANDED_KEY generic; + +} SYMCRYPT_HMAC_SHA3_256_EXPANDED_KEY, *PSYMCRYPT_HMAC_SHA3_256_EXPANDED_KEY; +typedef const SYMCRYPT_HMAC_SHA3_256_EXPANDED_KEY * PCSYMCRYPT_HMAC_SHA3_256_EXPANDED_KEY; + +// +// SYMCRYPT_HMAC_SHA3_256_STATE +// +// Data structure that encodes an ongoing HMAC-SHA3-256 computation. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_SHA3_256_STATE +{ + SYMCRYPT_HMAC_STATE generic; + +} SYMCRYPT_HMAC_SHA3_256_STATE, *PSYMCRYPT_HMAC_SHA3_256_STATE; +typedef const SYMCRYPT_HMAC_SHA3_256_STATE *PCSYMCRYPT_HMAC_SHA3_256_STATE; + +// +// SYMCRYPT_HMAC_SHA3_384_EXPANDED_KEY +// +// Data structure to store an expanded key for HMAC-SHA3-384 +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_SHA3_384_EXPANDED_KEY +{ + SYMCRYPT_HMAC_EXPANDED_KEY generic; + +} SYMCRYPT_HMAC_SHA3_384_EXPANDED_KEY, *PSYMCRYPT_HMAC_SHA3_384_EXPANDED_KEY; +typedef const SYMCRYPT_HMAC_SHA3_384_EXPANDED_KEY * PCSYMCRYPT_HMAC_SHA3_384_EXPANDED_KEY; + +// +// SYMCRYPT_HMAC_SHA3_384_STATE +// +// Data structure that encodes an ongoing HMAC-SHA3-384 computation. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_SHA3_384_STATE +{ + SYMCRYPT_HMAC_STATE generic; + +} SYMCRYPT_HMAC_SHA3_384_STATE, *PSYMCRYPT_HMAC_SHA3_384_STATE; +typedef const SYMCRYPT_HMAC_SHA3_384_STATE *PCSYMCRYPT_HMAC_SHA3_384_STATE; + +// +// SYMCRYPT_HMAC_SHA3_512_EXPANDED_KEY +// +// Data structure to store an expanded key for HMAC-SHA3-512 +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_SHA3_512_EXPANDED_KEY +{ + SYMCRYPT_HMAC_EXPANDED_KEY generic; + +} SYMCRYPT_HMAC_SHA3_512_EXPANDED_KEY, *PSYMCRYPT_HMAC_SHA3_512_EXPANDED_KEY; +typedef const SYMCRYPT_HMAC_SHA3_512_EXPANDED_KEY * PCSYMCRYPT_HMAC_SHA3_512_EXPANDED_KEY; + +// +// SYMCRYPT_HMAC_SHA3_512_STATE +// +// Data structure that encodes an ongoing HMAC-SHA3-512 computation. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_HMAC_SHA3_512_STATE +{ + SYMCRYPT_HMAC_STATE generic; + +} SYMCRYPT_HMAC_SHA3_512_STATE, *PSYMCRYPT_HMAC_SHA3_512_STATE; +typedef const SYMCRYPT_HMAC_SHA3_512_STATE *PCSYMCRYPT_HMAC_SHA3_512_STATE; + +// +// SYMCRYPT_AES_EXPANDED_KEY +// +// Expanded key for AES operations. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_AES_EXPANDED_KEY { + SYMCRYPT_ALIGN BYTE RoundKey[29][4][4]; + // Round keys, first the encryption round keys in encryption order, + // followed by the decryption round keys in decryption order. + // The first decryption round key is the last encryption round key. + // AES-256 has 14 rounds and thus 15 round keys for encryption and 15 + // for decryption. As they share one round key, we need room for 29. + BYTE (*lastEncRoundKey)[4][4]; // Pointer to last encryption round key + // also the first round key for decryption + BYTE (*lastDecRoundKey)[4][4]; // Pointer to last decryption round key. + + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_AES_EXPANDED_KEY, *PSYMCRYPT_AES_EXPANDED_KEY; +typedef const SYMCRYPT_AES_EXPANDED_KEY * PCSYMCRYPT_AES_EXPANDED_KEY; + +// +// AES-CMAC +// +// Note: SYMCRYPT_AES_BLOCK_SIZE is not yet defined, so we use +// literal constants instead. +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_AES_CMAC_EXPANDED_KEY +{ + SYMCRYPT_AES_EXPANDED_KEY aesKey; + BYTE K1[16]; + BYTE K2[16]; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_AES_CMAC_EXPANDED_KEY, *PSYMCRYPT_AES_CMAC_EXPANDED_KEY; +typedef const SYMCRYPT_AES_CMAC_EXPANDED_KEY * PCSYMCRYPT_AES_CMAC_EXPANDED_KEY; + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_AES_CMAC_STATE +{ + BYTE chain[16]; + BYTE buf[16]; + SIZE_T bytesInBuf; + PCSYMCRYPT_AES_CMAC_EXPANDED_KEY pKey; + + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_AES_CMAC_STATE, *PSYMCRYPT_AES_CMAC_STATE; +typedef const SYMCRYPT_AES_CMAC_STATE * PCSYMCRYPT_AES_CMAC_STATE; + +// +// POLY1305 +// + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_POLY1305_STATE +{ + UINT32 r[4]; // R := \sum 2^{32*i} r[i]. R is already clamped. + UINT32 s[4]; // S := \sum 2^{32*i} s[i] + UINT32 a[5]; // Accumulator := sum 2^{32*i} a[i], a[4] <= approx 8 + SIZE_T bytesInBuffer; + BYTE buf[16]; // Partial block buffer + + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_POLY1305_STATE, *PSYMCRYPT_POLY1305_STATE; + +// +// XTS-AES +// + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_XTS_AES_EXPANDED_KEY +{ + SYMCRYPT_AES_EXPANDED_KEY key1; + SYMCRYPT_AES_EXPANDED_KEY key2; +} SYMCRYPT_XTS_AES_EXPANDED_KEY, *PSYMCRYPT_XTS_AES_EXPANDED_KEY; +typedef const SYMCRYPT_XTS_AES_EXPANDED_KEY * PCSYMCRYPT_XTS_AES_EXPANDED_KEY; + + +//----------------------------------------------------------------- +// Mac description table +// Below are the typedefs for the Mac description table type +// Callers can use this to define Mac algorithm they want to use +// + +#define SYMCRYPT_MAC_MAX_RESULT_SIZE SYMCRYPT_HMAC_SHA512_RESULT_SIZE + +typedef union _SYMCRYPT_MAC_STATE +{ + SYMCRYPT_HMAC_MD5_STATE md5State; + SYMCRYPT_HMAC_SHA1_STATE sha1State; + SYMCRYPT_HMAC_SHA224_STATE sha224State; + SYMCRYPT_HMAC_SHA256_STATE sha256State; + SYMCRYPT_HMAC_SHA384_STATE sha384State; + SYMCRYPT_HMAC_SHA512_STATE sha512State; + SYMCRYPT_HMAC_SHA512_224_STATE sha512_224State; + SYMCRYPT_HMAC_SHA512_256_STATE sha512_256State; + SYMCRYPT_HMAC_SHA3_224_STATE sha3_224State; + SYMCRYPT_HMAC_SHA3_256_STATE sha3_256State; + SYMCRYPT_HMAC_SHA3_384_STATE sha3_384State; + SYMCRYPT_HMAC_SHA3_512_STATE sha3_512State; + SYMCRYPT_AES_CMAC_STATE aescmacState; + SYMCRYPT_KMAC128_STATE kmac128State; + SYMCRYPT_KMAC256_STATE kmac256State; +} SYMCRYPT_MAC_STATE, *PSYMCRYPT_MAC_STATE; +typedef const SYMCRYPT_MAC_STATE *PCSYMCRYPT_MAC_STATE; + +typedef union _SYMCRYPT_MAC_EXPANDED_KEY +{ + SYMCRYPT_HMAC_MD5_EXPANDED_KEY md5Key; + SYMCRYPT_HMAC_SHA1_EXPANDED_KEY sha1Key; + SYMCRYPT_HMAC_SHA224_EXPANDED_KEY sha224Key; + SYMCRYPT_HMAC_SHA256_EXPANDED_KEY sha256Key; + SYMCRYPT_HMAC_SHA384_EXPANDED_KEY sha384Key; + SYMCRYPT_HMAC_SHA512_EXPANDED_KEY sha512Key; + SYMCRYPT_HMAC_SHA512_224_EXPANDED_KEY sha512_224Key; + SYMCRYPT_HMAC_SHA512_256_EXPANDED_KEY sha512_256Key; + SYMCRYPT_HMAC_SHA3_224_EXPANDED_KEY sha3_224Key; + SYMCRYPT_HMAC_SHA3_256_EXPANDED_KEY sha3_256Key; + SYMCRYPT_HMAC_SHA3_384_EXPANDED_KEY sha3_384Key; + SYMCRYPT_HMAC_SHA3_512_EXPANDED_KEY sha3_512Key; + SYMCRYPT_AES_CMAC_EXPANDED_KEY aescmacKey; + SYMCRYPT_KMAC128_EXPANDED_KEY kmac128Key; + SYMCRYPT_KMAC256_EXPANDED_KEY kmac256Key; +} SYMCRYPT_MAC_EXPANDED_KEY, *PSYMCRYPT_MAC_EXPANDED_KEY; +typedef const SYMCRYPT_MAC_EXPANDED_KEY *PCSYMCRYPT_MAC_EXPANDED_KEY; + +typedef SYMCRYPT_ERROR (SYMCRYPT_CALL * PSYMCRYPT_MAC_EXPAND_KEY) + ( PVOID pExpandedKey, PCBYTE pbKey, SIZE_T cbKey ); +typedef VOID (SYMCRYPT_CALL * PSYMCRYPT_MAC_INIT) ( PVOID pState, PCVOID pExpandedKey ); +typedef VOID (SYMCRYPT_CALL * PSYMCRYPT_MAC_APPEND)( PVOID pState, PCBYTE pbData, SIZE_T cbData ); +typedef VOID (SYMCRYPT_CALL * PSYMCRYPT_MAC_RESULT) ( PVOID pState, PVOID pbResult ); +typedef VOID (SYMCRYPT_CALL * PSYMCRYPT_MAC_RESULT_EX) ( PVOID pState, PVOID pbResult, SIZE_T cbResult ); + +typedef struct _SYMCRYPT_MAC +{ + PSYMCRYPT_MAC_EXPAND_KEY expandKeyFunc; + PSYMCRYPT_MAC_INIT initFunc; + PSYMCRYPT_MAC_APPEND appendFunc; + PSYMCRYPT_MAC_RESULT resultFunc; + SIZE_T expandedKeySize; + SIZE_T stateSize; + SIZE_T resultSize; + const PCSYMCRYPT_HASH * ppHashAlgorithm; // NULL for MACs not based on hashes + UINT32 outerChainingStateOffset; // Offset into expanded key of outer chaining state; 0 for non-HMAC algorithms +} SYMCRYPT_MAC, *PSYMCRYPT_MAC; +typedef const SYMCRYPT_MAC *PCSYMCRYPT_MAC; + + + +// +// 3DES +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_3DES_EXPANDED_KEY { + UINT32 roundKey[3][16][2]; // 3 keys, 16 rounds, 2 UINT32s/round + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_3DES_EXPANDED_KEY, *PSYMCRYPT_3DES_EXPANDED_KEY; +typedef const SYMCRYPT_3DES_EXPANDED_KEY * PCSYMCRYPT_3DES_EXPANDED_KEY; + +// +// DES +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_DES_EXPANDED_KEY { + SYMCRYPT_3DES_EXPANDED_KEY threeDes; +} SYMCRYPT_DES_EXPANDED_KEY, *PSYMCRYPT_DES_EXPANDED_KEY; +typedef const SYMCRYPT_DES_EXPANDED_KEY * PCSYMCRYPT_DES_EXPANDED_KEY; + +// +// DESX +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_DESX_EXPANDED_KEY { + SYMCRYPT_DES_EXPANDED_KEY desKey; + BYTE inputWhitening[8]; + BYTE outputWhitening[8]; +} SYMCRYPT_DESX_EXPANDED_KEY, *PSYMCRYPT_DESX_EXPANDED_KEY; +typedef const SYMCRYPT_DESX_EXPANDED_KEY * PCSYMCRYPT_DESX_EXPANDED_KEY; + +// +// RC2 +// +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_RC2_EXPANDED_KEY { + UINT16 K[64]; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_RC2_EXPANDED_KEY, *PSYMCRYPT_RC2_EXPANDED_KEY; +typedef const SYMCRYPT_RC2_EXPANDED_KEY * PCSYMCRYPT_RC2_EXPANDED_KEY; + + +// +// CCM states for incremental computations +// +#define SYMCRYPT_CCM_BLOCK_SIZE (16) + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_CCM_STATE { + PCSYMCRYPT_BLOCKCIPHER pBlockCipher; + PCVOID pExpandedKey; + UINT64 cbData; // exact length of data + SIZE_T cbTag; + SIZE_T cbNonce; + SIZE_T cbCounter; // # bytes in counter field + UINT64 bytesProcessed; // data bytes processed so far + _Field_range_( 0, SYMCRYPT_CCM_BLOCK_SIZE-1 ) SIZE_T bytesInMacBlock; + SYMCRYPT_ALIGN BYTE counterBlock[SYMCRYPT_CCM_BLOCK_SIZE]; // Current counter block value + SYMCRYPT_ALIGN BYTE macBlock[SYMCRYPT_CCM_BLOCK_SIZE]; // Current state of the CBC-MAC part of CCM + SYMCRYPT_ALIGN BYTE keystreamBlock[SYMCRYPT_CCM_BLOCK_SIZE]; // Remaining key stream if partial block has been processed + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_CCM_STATE, *PSYMCRYPT_CCM_STATE; + + +// +// GHash & GCM +// + +typedef union _SYMCRYPT_GCM_SUPPORTED_BLOCKCIPHER_KEYS +{ + SYMCRYPT_AES_EXPANDED_KEY aes; +} SYMCRYPT_GCM_SUPPORTED_BLOCKCIPHER_KEYS; + +#define SYMCRYPT_GCM_BLOCKCIPHER_KEY_SIZE sizeof( union _SYMCRYPT_GCM_SUPPORTED_BLOCKCIPHER_KEYS ) + +#define SYMCRYPT_GF128_FIELD_SIZE (128) +#define SYMCRYPT_GF128_BLOCK_SIZE (16) // # bytes in a field element/block +#define SYMCRYPT_GCM_BLOCK_SIZE (16) +#define SYMCRYPT_GCM_MAX_KEY_SIZE (32) + + +#define SYMCRYPT_GCM_MAX_DATA_SIZE (((UINT64)1 << 36) - 32) + +#define SYMCRYPT_GCM_BLOCK_MOD_MASK (SYMCRYPT_GCM_BLOCK_SIZE - 1) +#define SYMCRYPT_GCM_BLOCK_ROUND_MASK (~SYMCRYPT_GCM_BLOCK_MOD_MASK) + +#if SYMCRYPT_CPU_X86 + // + // x86 needs extra alignment of the GHASH expanded key to support + // aligned (fast) XMM access. AMD64 has enough natural alignment to + // achieve this. + // + #define SYMCRYPT_GHASH_EXTRA_KEY_ALIGNMENT +#endif + +#define SYMCRYPT_GHASH_ALLOW_XMM (SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64) +#define SYMCRYPT_GHASH_ALLOW_NEON (SYMCRYPT_CPU_ARM | SYMCRYPT_CPU_ARM64) + + +#if SYMCRYPT_CPU_ARM +#include <arm_neon.h> +#if SYMCRYPT_GNUC || defined(__clang__) + #define __n128 uint32x4_t + #define __n64 uint64x1_t +#endif + +#elif SYMCRYPT_CPU_ARM64 + + #if SYMCRYPT_MS_VC && !defined(__clang__) + #include <arm64_neon.h> + + // See section 6.7.8 of the C standard for details on this initializer usage. + #define SYMCRYPT_SET_N128_U64(d0, d1) \ + ((__n128) {.n128_u64 = {d0, d1}}) + #define SYMCRYPT_SET_N64_U64(d0) \ + ((__n64) {.n64_u64 = {d0}}) + #define SYMCRYPT_SET_N128_U8(b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15) \ + ((__n128) {.n128_u8 = {b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15}}) + #else + #include <arm_neon.h> + + #define __n128 uint8x16_t + #define __n64 uint8x8_t + + #define SYMCRYPT_SET_N128_U64(d0, d1) \ + ((__n128) ((uint64x2_t) {d0, d1})) + #define SYMCRYPT_SET_N64_U64(d0) \ + ((__n64) ((uint64x1_t) {d0})) + #define SYMCRYPT_SET_N128_U8(b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15) \ + ((__n128) ((uint8x16_t) {b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15})) + + #define vmullq_p64( a, b ) ((__n128) vmull_p64(vgetq_lane_p64((poly64x2_t)a, 0), vgetq_lane_p64((poly64x2_t)b, 0))) + #define vmull_p64( a, b ) ((__n128) vmull_p64( (poly64_t)a, (poly64_t)b )) + #define vmull_high_p64( a, b ) ((__n128) vmull_high_p64( (poly64x2_t)a, (poly64x2_t)b )) + #endif + +#endif + +// +// All platforms use the same in-memory representation: +// elements of GF(2^128) stored as two 64-bit integers which are best +// interpreted as a single 128-bit integer, least significant half first. +// Note: the actual GF(2^128) bit order is reversed in the standard +// for some reason; the +// polynomial \sum b_i x^i is represented by integer \sum b_i 2^{127-i}) +// On x86/amd64 the same in-memory byte structure is also accessed as an +// __m128i, which works as both the UINT64s, UINT32s, and the __m128i use +// LSBfirst convention. +// +typedef SYMCRYPT_ALIGN_UNION _SYMCRYPT_GF128_ELEMENT { + UINT64 ull[2]; +#if SYMCRYPT_GHASH_ALLOW_XMM + // + // The XMM code accesses this both as UINT32[] and __m128i + // This is safe as XMM code only runs on little endian machines so the + // ordering is known. + // + __m128i m128i; + UINT32 ul[4]; +#endif +#if SYMCRYPT_GHASH_ALLOW_NEON + __n128 n128; + UINT32 ul[4]; +#endif +} SYMCRYPT_GF128_ELEMENT, *PSYMCRYPT_GF128_ELEMENT; +typedef const SYMCRYPT_GF128_ELEMENT * PCSYMCRYPT_GF128_ELEMENT; + + + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_GHASH_EXPANDED_KEY { +#if defined( SYMCRYPT_GHASH_EXTRA_KEY_ALIGNMENT ) + UINT32 tableOffset; + BYTE tableSpace[ (SYMCRYPT_GF128_FIELD_SIZE + 1) * sizeof( SYMCRYPT_GF128_ELEMENT ) ]; +#else + SYMCRYPT_GF128_ELEMENT table[ SYMCRYPT_GF128_FIELD_SIZE ]; +#endif +} SYMCRYPT_GHASH_EXPANDED_KEY, *PSYMCRYPT_GHASH_EXPANDED_KEY; +typedef const SYMCRYPT_GHASH_EXPANDED_KEY * PCSYMCRYPT_GHASH_EXPANDED_KEY; + + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_GCM_EXPANDED_KEY { + SYMCRYPT_GHASH_EXPANDED_KEY ghashKey; + PCSYMCRYPT_BLOCKCIPHER pBlockCipher; + SYMCRYPT_GCM_SUPPORTED_BLOCKCIPHER_KEYS blockcipherKey; + SIZE_T cbKey; + BYTE abKey[SYMCRYPT_GCM_MAX_KEY_SIZE]; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_GCM_EXPANDED_KEY, * PSYMCRYPT_GCM_EXPANDED_KEY; +typedef const SYMCRYPT_GCM_EXPANDED_KEY * PCSYMCRYPT_GCM_EXPANDED_KEY; + + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_GCM_STATE { + PCSYMCRYPT_GCM_EXPANDED_KEY pKey; + UINT64 cbData; // Number of data bytes + UINT64 cbAuthData; // Number of AAD bytes + _Field_range_( 0, SYMCRYPT_GCM_BLOCK_SIZE-1 ) SIZE_T bytesInMacBlock; + SYMCRYPT_GF128_ELEMENT ghashState; + SYMCRYPT_ALIGN BYTE counterBlock[SYMCRYPT_GCM_BLOCK_SIZE]; + SYMCRYPT_ALIGN BYTE macBlock[SYMCRYPT_GCM_BLOCK_SIZE]; + SYMCRYPT_ALIGN BYTE keystreamBlock[SYMCRYPT_GCM_BLOCK_SIZE]; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_GCM_STATE, * PSYMCRYPT_GCM_STATE; +typedef const SYMCRYPT_GCM_STATE * PCSYMCRYPT_GCM_STATE; + + +// +// Block ciphers +// +#define SYMCRYPT_MAX_BLOCK_SIZE (32) // max block length of a block cipher. + +typedef SYMCRYPT_ERROR( SYMCRYPT_CALL * PSYMCRYPT_BLOCKCIPHER_EXPAND_KEY ) +(PVOID pExpandedKey, PCBYTE pbKey, SIZE_T cbKey); +typedef VOID( SYMCRYPT_CALL * PSYMCRYPT_BLOCKCIPHER_CRYPT ) (PCVOID pExpandedKey, PCBYTE pbSrc, PBYTE pbDst); +typedef VOID( SYMCRYPT_CALL * PSYMCRYPT_BLOCKCIPHER_CRYPT_ECB ) (PCVOID pExpandedKey, PCBYTE pbSrc, PBYTE pbDst, SIZE_T cbData); +typedef VOID( SYMCRYPT_CALL * PSYMCRYPT_BLOCKCIPHER_CRYPT_MODE ) (PCVOID pExpandedKey, PBYTE pbChainingValue, PCBYTE pbSrc, PBYTE pbDst, SIZE_T cbData); +typedef VOID( SYMCRYPT_CALL * PSYMCRYPT_BLOCKCIPHER_MAC_MODE ) (PCVOID pExpandedKey, PBYTE pbChainingValue, PCBYTE pbSrc, SIZE_T cbData); +typedef VOID( SYMCRYPT_CALL * PSYMCRYPT_BLOCKCIPHER_AEADPART_MODE ) (PVOID pState, PCBYTE pbSrc, PBYTE pbDst, SIZE_T cbData); + +struct _SYMCRYPT_BLOCKCIPHER { + PSYMCRYPT_BLOCKCIPHER_EXPAND_KEY expandKeyFunc; // mandatory + PSYMCRYPT_BLOCKCIPHER_CRYPT encryptFunc; // mandatory + PSYMCRYPT_BLOCKCIPHER_CRYPT decryptFunc; // mandatory + PSYMCRYPT_BLOCKCIPHER_CRYPT_ECB ecbEncryptFunc; // NULL if no optimized version available + PSYMCRYPT_BLOCKCIPHER_CRYPT_ECB ecbDecryptFunc; // NULL if no optimized version available + PSYMCRYPT_BLOCKCIPHER_CRYPT_MODE cbcEncryptFunc; // NULL if no optimized version available + PSYMCRYPT_BLOCKCIPHER_CRYPT_MODE cbcDecryptFunc; // NULL if no optimized version available + PSYMCRYPT_BLOCKCIPHER_MAC_MODE cbcMacFunc; // NULL if no optimized version available + PSYMCRYPT_BLOCKCIPHER_CRYPT_MODE ctrMsb64Func; // NULL if no optimized version available + PSYMCRYPT_BLOCKCIPHER_AEADPART_MODE gcmEncryptPartFunc; // NULL if no optimized version available + PSYMCRYPT_BLOCKCIPHER_AEADPART_MODE gcmDecryptPartFunc; // NULL if no optimized version available + _Field_range_( 1, SYMCRYPT_MAX_BLOCK_SIZE ) SIZE_T blockSize; // = SYMCRYPT_XXX_BLOCK_SIZE, power of 2, 1 <= value <= 32. + SIZE_T expandedKeySize; // = sizeof( SYMCRYPT_XXX_EXPANDED_KEY ) +}; + + + +// +// Session structs +// + +#define SYMCRYPT_FLAG_SESSION_ENCRYPT (0x1) + +// +// SYMCRYPT_SESSION tracks the Nonces being used in a session. It is used differently depending on +// whether the session is an Encryption session or a Decryption session. +// +// In Encryption sessions, SYMCRYPT_SESSION tracks the Nonce which was used in the most recent +// attempted encryption in the session. +// messageNumber is atomically incremented by each encryption call, and the encryption method uses +// the messageNumber value that is the _result_ of the increment. +// +// In Decryption sessions, SYMCRYPT_SESSION tracks the most recently received Nonces in a series of +// successful decryptions. Nonces used in unsuccessful decryption calls do not update SYMCRYPT_SESSION. +// Information is tracked such that the decryption function can detect repeated Nonce values and +// fail decryption in this case. In order for this to work the message numbers that are provided +// to decrypt calls must be somewhat ordered. Provided message numbers may be arbitrarily far ahead +// of previously successfully decrypted message numbers, but may only be up to 63 behind the highest +// message number successfully decrypted so far. +// messageNumber normally represents the highest message number used in a successful decryption in +// this session. (The exception is at initialization, where messageNumber is initialized to 64 +// without the corresponding 0th bit in the replayMask being set - this initial state represents +// there have been no successful decryptions yet, and that the earliest messageNumber that can be +// successfully received is 1) +// replayMask represents whether a window of 64 message numbers up to messageNumber have already been +// successfully used; +// bit n of replayMask (from n=0 to n=63) represents message number = (messageNumber-n), 0 means not +// yet used, and 1 means already used in a successful decryption call +// + +#if SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM64 +#define SYMCRYPT_USE_CAS128 (1) + +// For CompareAndSwap128 method, SYMCRYPT_SESSION must be aligned to 16B +#define SYMCRYPT_ALIGN_SESSION SYMCRYPT_ALIGN_TYPE_AT(struct, 16) +#else +#define SYMCRYPT_USE_CAS128 (0) + +// For method with only 64-bit atomics, SYMCRYPT_SESSION must be aligned to 8B +#define SYMCRYPT_ALIGN_SESSION SYMCRYPT_ALIGN_TYPE_AT(struct, 8) +#endif + +// Nested struct used within SYMCRYPT_SESSION +typedef SYMCRYPT_ALIGN_SESSION _SYMCRYPT_SESSION_REPLAY_STATE { + UINT64 replayMask; + // 64 bit mask representing message numbers previously successfully decrypted up to 63 + // before the most recent message number. + + UINT64 messageNumber; + // the last 8 bytes of the Nonce (MSB-first) +} SYMCRYPT_SESSION_REPLAY_STATE, * PSYMCRYPT_SESSION_REPLAY_STATE; +typedef const SYMCRYPT_SESSION_REPLAY_STATE * PCSYMCRYPT_SESSION_REPLAY_STATE; + +typedef SYMCRYPT_ALIGN_SESSION _SYMCRYPT_SESSION { + SYMCRYPT_SESSION_REPLAY_STATE replayState; + // nested replayState struct is to improve code clarity in SymCryptSessionDecryptUpdate* + + UINT32 senderId; + // the first 4 bytes of the Nonce (MSB-first) + // (set by the caller and constant for the lifetime of a session) + + UINT32 flags; + // SYMCRYPT_FLAG_SESSION_ENCRYPT indicates the struct is to be used for an encryption session, + // otherwise the struct is to be used for a decryption session + + PVOID pMutex; + // Pointer to a fast single-process mutex object used to enable atomic update of replayMask and + // messageNumber in the absence of support for a 128b CAS operation +} SYMCRYPT_SESSION, * PSYMCRYPT_SESSION; + +#define SYMCRYPT_SESSION_MAX_MESSAGE_NUMBER (0xffffffff00000000ull) +// We do not allow messageNumber to go above some maximum value (currently 2^64 - 2^32) +// This gives us a large window to prevent many concurrent encryption threads from updating the +// session such that the messageNumber overflows and the same IV is used in many encryptions +// (i.e. we would only potentially get a spurious success using a repeated IV when there are +// >2^32 concurrent threads!) + +#if SYMCRYPT_USE_CAS128 +C_ASSERT(SYMCRYPT_FIELD_OFFSET(SYMCRYPT_SESSION, replayState.replayMask) == 0); +C_ASSERT(SYMCRYPT_FIELD_OFFSET(SYMCRYPT_SESSION, replayState.messageNumber) == 8); +// For CompareAndSwap128 method, replayMask and messageNumber must be tightly packed +#endif + +// +// RC4 +// + +// +// Some CPUs like the S array type to be larger than BYTE. We abstract the data type +// of the S array to accommodate such CPUs in future. +// + +typedef BYTE SYMCRYPT_RC4_S_TYPE; + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_RC4_STATE { + SYMCRYPT_RC4_S_TYPE S[256]; + BYTE i; + BYTE j; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_RC4_STATE, *PSYMCRYPT_RC4_STATE; + +// +// ChaCha20 +// + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_CHACHA20_STATE { + UINT32 key[8]; + UINT32 nonce[3]; + UINT64 offset; // offset to use for next operation + BOOLEAN keystreamBufferValid; // keystream buffer matches offset value + BYTE keystream[64]; +} SYMCRYPT_CHACHA20_STATE, *PSYMCRYPT_CHACHA20_STATE; + + +// +// AES_CTR_DRBG +// + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_RNG_AES_STATE { + // + // Key and V value are in one array, to allow fast generation of both of them + // in a single call. + // + BYTE keyAndV[32 + 16]; + BYTE previousBlock[16]; + UINT64 requestCounter; // called reseed_counter in SP 800-90 + BOOLEAN fips140_2Check; // set if the FIPS 140-2 continuous self-test is required + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_RNG_AES_STATE, * PSYMCRYPT_RNG_AES_STATE; + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_RNG_AES_FIPS140_2_STATE { + SYMCRYPT_RNG_AES_STATE rng; +} SYMCRYPT_RNG_AES_FIPS140_2_STATE, *PSYMCRYPT_RNG_AES_FIPS140_2_STATE; + + +// +// MARVIN32 +// + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_MARVIN32_EXPANDED_SEED +{ + UINT32 s[2]; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_MARVIN32_EXPANDED_SEED, *PSYMCRYPT_MARVIN32_EXPANDED_SEED; +typedef const SYMCRYPT_MARVIN32_EXPANDED_SEED * PCSYMCRYPT_MARVIN32_EXPANDED_SEED; + + +typedef SYMCRYPT_MARVIN32_EXPANDED_SEED SYMCRYPT_MARVIN32_CHAINING_STATE, * PSYMCRYPT_MARVIN32_CHAINING_STATE; + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_MARVIN32_STATE +{ + SYMCRYPT_ALIGN BYTE buffer[8]; // 4 bytes of data, 4 more bytes for final padding + SYMCRYPT_MARVIN32_CHAINING_STATE chain; // chaining state + PCSYMCRYPT_MARVIN32_EXPANDED_SEED pSeed; // + UINT32 dataLength; // length of the data processed so far, mod 2^32 + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_MARVIN32_STATE, *PSYMCRYPT_MARVIN32_STATE; +typedef const SYMCRYPT_MARVIN32_STATE *PCSYMCRYPT_MARVIN32_STATE; + + +// +// Export blob sizes +// + +#define SYMCRYPT_MD2_STATE_EXPORT_SIZE (80) +#define SYMCRYPT_MD4_STATE_EXPORT_SIZE (116) +#define SYMCRYPT_MD5_STATE_EXPORT_SIZE (116) +#define SYMCRYPT_SHA1_STATE_EXPORT_SIZE (120) +#define SYMCRYPT_SHA224_STATE_EXPORT_SIZE (132) +#define SYMCRYPT_SHA256_STATE_EXPORT_SIZE (132) +#define SYMCRYPT_SHA384_STATE_EXPORT_SIZE (236) +#define SYMCRYPT_SHA512_STATE_EXPORT_SIZE (236) +#define SYMCRYPT_SHA512_224_STATE_EXPORT_SIZE (236) +#define SYMCRYPT_SHA512_256_STATE_EXPORT_SIZE (236) + +#define SYMCRYPT_KECCAK_STATE_EXPORT_SIZE (234) +#define SYMCRYPT_SHA3_224_STATE_EXPORT_SIZE SYMCRYPT_KECCAK_STATE_EXPORT_SIZE +#define SYMCRYPT_SHA3_256_STATE_EXPORT_SIZE SYMCRYPT_KECCAK_STATE_EXPORT_SIZE +#define SYMCRYPT_SHA3_384_STATE_EXPORT_SIZE SYMCRYPT_KECCAK_STATE_EXPORT_SIZE +#define SYMCRYPT_SHA3_512_STATE_EXPORT_SIZE SYMCRYPT_KECCAK_STATE_EXPORT_SIZE + + +// +// KDF algorithms +// + +// +// PBKDF2 +// + +typedef struct _SYMCRYPT_PBKDF2_EXPANDED_KEY { + SYMCRYPT_MAC_EXPANDED_KEY macKey; + PCSYMCRYPT_MAC macAlg; +} SYMCRYPT_PBKDF2_EXPANDED_KEY, *PSYMCRYPT_PBKDF2_EXPANDED_KEY; +typedef const SYMCRYPT_PBKDF2_EXPANDED_KEY *PCSYMCRYPT_PBKDF2_EXPANDED_KEY; + +// +// SP 800-108 +// + +typedef struct _SYMCRYPT_SP800_108_EXPANDED_KEY { + SYMCRYPT_MAC_EXPANDED_KEY macKey; + PCSYMCRYPT_MAC macAlg; +} SYMCRYPT_SP800_108_EXPANDED_KEY, *PSYMCRYPT_SP800_108_EXPANDED_KEY; +typedef const SYMCRYPT_SP800_108_EXPANDED_KEY *PCSYMCRYPT_SP800_108_EXPANDED_KEY; + +// +// TLS PRF 1.1 +// + +typedef struct _SYMCRYPT_TLSPRF1_1_EXPANDED_KEY { + SYMCRYPT_HMAC_MD5_EXPANDED_KEY macMd5Key; + SYMCRYPT_HMAC_SHA1_EXPANDED_KEY macSha1Key; +} SYMCRYPT_TLSPRF1_1_EXPANDED_KEY, *PSYMCRYPT_TLSPRF1_1_EXPANDED_KEY; +typedef const SYMCRYPT_TLSPRF1_1_EXPANDED_KEY *PCSYMCRYPT_TLSPRF1_1_EXPANDED_KEY; + +// +// TLS PRF 1.2 +// + +typedef struct _SYMCRYPT_TLSPRF1_2_EXPANDED_KEY { + SYMCRYPT_MAC_EXPANDED_KEY macKey; + PCSYMCRYPT_MAC macAlg; +} SYMCRYPT_TLSPRF1_2_EXPANDED_KEY, *PSYMCRYPT_TLSPRF1_2_EXPANDED_KEY; +typedef const SYMCRYPT_TLSPRF1_2_EXPANDED_KEY *PCSYMCRYPT_TLSPRF1_2_EXPANDED_KEY; + +// +// SSH-KDF +// +typedef struct _SYMCRYPT_SSHKDF_EXPANDED_KEY { + PCSYMCRYPT_HASH pHashFunc; + SYMCRYPT_HASH_STATE hashState; +} SYMCRYPT_SSHKDF_EXPANDED_KEY, *PSYMCRYPT_SSHKDF_EXPANDED_KEY; +typedef const SYMCRYPT_SSHKDF_EXPANDED_KEY *PCSYMCRYPT_SSHKDF_EXPANDED_KEY; + +// +// SRTP-KDF +// +typedef struct _SYMCRYPT_SRTPKDF_EXPANDED_KEY { + SYMCRYPT_AES_EXPANDED_KEY aesExpandedKey; +} SYMCRYPT_SRTPKDF_EXPANDED_KEY, *PSYMCRYPT_SRTPKDF_EXPANDED_KEY; +typedef const SYMCRYPT_SRTPKDF_EXPANDED_KEY *PCSYMCRYPT_SRTPKDF_EXPANDED_KEY; + +// +// HKDF +// + +typedef struct _SYMCRYPT_HKDF_EXPANDED_KEY { + SYMCRYPT_MAC_EXPANDED_KEY macKey; + PCSYMCRYPT_MAC macAlg; +} SYMCRYPT_HKDF_EXPANDED_KEY, *PSYMCRYPT_HKDF_EXPANDED_KEY; +typedef const SYMCRYPT_HKDF_EXPANDED_KEY *PCSYMCRYPT_HKDF_EXPANDED_KEY; + +// +// SSKDF +// +typedef struct _SYMCRYPT_SSKDF_MAC_EXPANDED_SALT { + SYMCRYPT_MAC_EXPANDED_KEY macKey; + PCSYMCRYPT_MAC macAlg; +} SYMCRYPT_SSKDF_MAC_EXPANDED_SALT, *PSYMCRYPT_SSKDF_MAC_EXPANDED_SALT; +typedef const SYMCRYPT_SSKDF_MAC_EXPANDED_SALT *PCSYMCRYPT_SSKDF_MAC_EXPANDED_SALT; + +// +// Digit & alignment sizes. +// +// WARNING: do not change these without updating all the optimized code, +// including assembler code. +// The FDEF_DIGIT_SIZE is the digit size used by the FDEF format. +// +#if SYMCRYPT_CPU_AMD64 + +#define SYMCRYPT_FDEF_DIGIT_SIZE 64 +#define SYMCRYPT_ASYM_ALIGN_VALUE 32 + +#elif SYMCRYPT_CPU_ARM64 + +#define SYMCRYPT_FDEF_DIGIT_SIZE 32 +#define SYMCRYPT_ASYM_ALIGN_VALUE 32 + +#else + +#define SYMCRYPT_FDEF_DIGIT_SIZE 16 +#define SYMCRYPT_ASYM_ALIGN_VALUE 16 // We have some bugs when ASYM_ALIGN_VALUE > DIGIT_SIZE; need to fix them if we implement AVX2-based x86 code. + +#endif + +#define SYMCRYPT_ASYM_ALIGN_UP( _p ) ((PBYTE) ( ((SIZE_T) (_p) + SYMCRYPT_ASYM_ALIGN_VALUE - 1) & ~(SYMCRYPT_ASYM_ALIGN_VALUE - 1 ) ) ) + + +//============================================================================================== +// Object types for low-level API +// +// INT integer in range 0..N for some N +// DIVISOR an integer > 0 that can be used to divide with. +// MODULUS a value M > 1 to use in modulo-M computations +// MODELEMENT An element in a modulo-M ring. +// ECPOINT A point on an elliptic curve. +// +// These objects are all aligned to SYMCRYPT_ASYM_ALIGN +// +#define SYMCRYPT_ASYM_ALIGN SYMCRYPT_ALIGN_AT(SYMCRYPT_ASYM_ALIGN_VALUE) +#if SYMCRYPT_MS_VC +#define SYMCRYPT_ASYM_ALIGN_STRUCT SYMCRYPT_ASYM_ALIGN struct +#elif SYMCRYPT_GNUC +#define SYMCRYPT_ASYM_ALIGN_STRUCT struct SYMCRYPT_ASYM_ALIGN +#else +#error Unknown compiler +#endif + +SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_INT; +typedef struct _SYMCRYPT_INT SYMCRYPT_INT; +typedef SYMCRYPT_INT * PSYMCRYPT_INT; +typedef const SYMCRYPT_INT * PCSYMCRYPT_INT; + +SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_DIVISOR; +typedef struct _SYMCRYPT_DIVISOR SYMCRYPT_DIVISOR; +typedef SYMCRYPT_DIVISOR * PSYMCRYPT_DIVISOR; +typedef const SYMCRYPT_DIVISOR * PCSYMCRYPT_DIVISOR; + +SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_MODULUS; +typedef struct _SYMCRYPT_MODULUS SYMCRYPT_MODULUS; +typedef SYMCRYPT_MODULUS * PSYMCRYPT_MODULUS; +typedef const SYMCRYPT_MODULUS * PCSYMCRYPT_MODULUS; + +SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_MODELEMENT; +typedef struct _SYMCRYPT_MODELEMENT SYMCRYPT_MODELEMENT; +typedef SYMCRYPT_MODELEMENT * PSYMCRYPT_MODELEMENT; +typedef const SYMCRYPT_MODELEMENT * PCSYMCRYPT_MODELEMENT; + +SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_ECPOINT; +typedef struct _SYMCRYPT_ECPOINT SYMCRYPT_ECPOINT; +typedef SYMCRYPT_ECPOINT * PSYMCRYPT_ECPOINT; +typedef const SYMCRYPT_ECPOINT * PCSYMCRYPT_ECPOINT; + + +// +// Arithmetic formats +// + +#define SYMCRYPT_ANYSIZE 1 // used to mark arrays of arbitrary size + +#define SYMCRYPT_FDEF_DIGIT_BITS (8*SYMCRYPT_FDEF_DIGIT_SIZE) +#define SYMCRYPT_FDEF_DIGITS_FROM_BITS( _bits ) ( \ + ((_bits)/ SYMCRYPT_FDEF_DIGIT_BITS) + \ + (( ((_bits) & (SYMCRYPT_FDEF_DIGIT_BITS-1)) + (SYMCRYPT_FDEF_DIGIT_BITS - 1) )/SYMCRYPT_FDEF_DIGIT_BITS) \ + ) + +#define SYMCRYPT_BYTES_FROM_BITS(bits) ( ( (bits) + 7 ) / 8 ) + +// The maximum number of bits in any integer value that the library supports. If the +// caller's input exceed this bound then the integer object will not be created. +// The caller either must ensure the bound is not exceeded, or check for NULL before +// using created SymCrypt objects. +// The primary purpose of this limit is to avoid integer overflows in size computations. +// Having a reasonable upper bound avoids all size overflows, even on 32-bit CPUs +#define SYMCRYPT_INT_MAX_BITS ((UINT32)(1 << 20)) + +// +// Upper bound for the number of digits: this MUST be enforced on runtime +// on all Allocate, SizeOf, and Create calls which take as input a digit number. +// +// Using this upper bound and the SYMCRYPT_INT_MAX_BITS upper bound we can argue +// that no integer overflow on 32-bit sizes can happen. Note that the computed upper +// bounds are very loose and the actual values are much smaller. +// +#define SYMCRYPT_FDEF_UPB_DIGITS (SYMCRYPT_FDEF_DIGITS_FROM_BITS(SYMCRYPT_INT_MAX_BITS)) + + + + +// +// All of the following SYMCRYPT_FDEF_SIZEOF_XXX_FROM_YYY computations for the four +// main SymCrypt objects (INT, DIVISOR, MODULUS, MODELEMENT) return a value not +// larger than 2^19 if the inputs _nDigits and _bits are not larger than +// SYMCRYPT_FDEF_UPB_DIGITS and SYMCRYPT_INT_MAX_BITS respectively (For MODELEMENT this bound +// is 2^17). The latter bounds must be enforced on runtime for all calculations taking as inputs +// number of digits or bits. +// +// The 2^19 upper bound is derived from: +// - the maximum (byte) size of an "integer": 2^20 bits / 8 = 2^17 bytes +// - "sizeof" computations add up to less than 2^18 bytes ~ 262 Kb +// - the modulus object contains two "integers" +// + +// +// Type fields contain the following: +// lower 16 bits: offset into virtual table (if any) +// upper 16 bits: bits 16-23: 1-character object type. Bits 24-31: 1 char implementation type +// The upper bits allow objects to be recognized in memory, making debugging easier. +// + +SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_INT { + UINT32 type; + _Field_range_( 1, SYMCRYPT_FDEF_UPB_DIGITS ) UINT32 nDigits; // digit size depends on run-time decisions... + UINT32 cbSize; + + SYMCRYPT_MAGIC_FIELD + SYMCRYPT_ASYM_ALIGN union { + struct { + UINT32 uint32[SYMCRYPT_ANYSIZE]; // FDEF: array UINT32[nDigits * # uint32 per digit] + } fdef; + } ti; // we must have a name here. 'ti' stands for 'Type-Int', it helps catch type errors when type-casting macros are used. +}; + +#define SYMCRYPT_FDEF_INT_PUINT32( p ) (&(p)->ti.fdef.uint32[0]) + + +#define SYMCRYPT_FDEF_SIZEOF_INT_FROM_DIGITS( _nDigits ) ((_nDigits) * SYMCRYPT_FDEF_DIGIT_SIZE + sizeof( SYMCRYPT_INT ) ) +#define SYMCRYPT_FDEF_SIZEOF_INT_FROM_BITS( _bits ) SYMCRYPT_FDEF_SIZEOF_INT_FROM_DIGITS( SYMCRYPT_FDEF_DIGITS_FROM_BITS( _bits )) + +SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_DIVISOR { + UINT32 type; + _Field_range_( 1, SYMCRYPT_FDEF_UPB_DIGITS ) UINT32 nDigits; // digit size depends on run-time decisions... + UINT32 cbSize; + + UINT32 nBits; // # bits in divisor + + SYMCRYPT_MAGIC_FIELD + union{ + struct { + UINT64 W; // approximate inverse of the divisor. Some implementations will use 64 bits, others 32 bits. + } fdef; + } td; + SYMCRYPT_INT Int; // Having a full Int here uses more space, but allows any Divisor to still be used as an Int. + // This structure is directly followed by the Int extension +}; + +#define SYMCRYPT_FDEF_SIZEOF_DIVISOR_FROM_DIGITS( _nDigits ) ((_nDigits) * SYMCRYPT_FDEF_DIGIT_SIZE + sizeof( SYMCRYPT_DIVISOR ) ) +#define SYMCRYPT_FDEF_SIZEOF_DIVISOR_FROM_BITS( _bits ) SYMCRYPT_FDEF_SIZEOF_DIVISOR_FROM_DIGITS( SYMCRYPT_FDEF_DIGITS_FROM_BITS( _bits )) + +SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_MODULUS { + UINT32 type; + _Field_range_( 1, SYMCRYPT_FDEF_UPB_DIGITS ) UINT32 nDigits; // digit size depends on run-time decisions... + UINT32 cbSize; // Size of modulus object + + UINT32 flags; // The flags the modulus was created with + UINT32 cbModElement; // Size of one modElement + UINT64 inv64; // -1/modulus mod 2^64 (always set but only to a useful value when the modulus is odd) + + SYMCRYPT_MAGIC_FIELD + union{ + struct { + //UINT32 nUint32Used; // # 32-bit words used in representing numbers. modulus < 2^{32*nUint32Used}. + // only values used are nDigits * uint32-per-digit or specific smaller values for optimized implementations + PCUINT32 Rsqr; // R^2 mod modulus, in uint32 form, nUint32Used words. Stored after Divisor. R = 2^{32*nUint32Used} + } montgomery; + struct { + UINT32 k; // modulus = 2^<bitsize of modelement> - k + } pseudoMersenne; + } tm; // type specific data. Every Modulus can be used as a generic modulus, so no type-specific data for generic. + + SYMCRYPT_DIVISOR Divisor; + // This structure is directly followed by: + // The extensions of the Divisor object + // and after that: + // FDEF: Rsqr as an array of UINT32, size = nDigits * digitsize + // FDEF: negDivisor as an array of UINT32, size = nDigits * digitsize +}; + +#define SYMCRYPT_FDEF_SIZEOF_MODULUS_FROM_DIGITS( _nDigits ) (sizeof( SYMCRYPT_MODULUS ) + SYMCRYPT_FDEF_SIZEOF_DIVISOR_FROM_DIGITS( _nDigits ) + (2 * _nDigits * SYMCRYPT_FDEF_DIGIT_SIZE) ) +#define SYMCRYPT_FDEF_SIZEOF_MODULUS_FROM_BITS( _bits ) SYMCRYPT_FDEF_SIZEOF_MODULUS_FROM_DIGITS(SYMCRYPT_FDEF_DIGITS_FROM_BITS( _bits )) + +SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_MODELEMENT { + // ModElements just store the information without any header. This union makes this well-defined, and allows easy access. + union{ + UINT32 uint32[SYMCRYPT_ANYSIZE]; + } d; +}; + +#define SYMCRYPT_FDEF_SIZEOF_MODELEMENT_FROM_DIGITS( _nDigits ) ((_nDigits) * SYMCRYPT_FDEF_DIGIT_SIZE) +#define SYMCRYPT_FDEF_SIZEOF_MODELEMENT_FROM_BITS( _bits ) SYMCRYPT_FDEF_SIZEOF_MODELEMENT_FROM_DIGITS( SYMCRYPT_FDEF_DIGITS_FROM_BITS( _bits ) ) + +// +// Upper bound for scratch size computations for FDEF objects depending only on digits +// +// The following 14 scratch size computation macros are all of the form: +// Some SIZEOF macros + max( some other scratch macros ) +// and all depend on some number of digits. (Slight exceptions are +// INT_TO_MODULUS and INT_PRIME_GEN but they can fit into the below +// rationale.) +// +// One can see that the deepest recursion in these macros and the biggest +// return value is for +// INT_PRIME_GEN -> INT_MILLER_RABIN -> MODEXP -> +// COMMON_MOD_OPERATIONS -> SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_DIVMOD +// +// Using the 2^19 (2^17) bound on the sizeof computations the biggest contribution on the above chain is for MODEXP: +// ((1 << SYMCRYPT_FDEF_MAX_WINDOW_MODEXP) + 2) * SYMCRYPT_FDEF_SIZEOF_MODELEMENT_FROM_DIGITS( _nModDigits ) +// which is bounded above by +// (2^6 + 2) * 2^17 < 2^24 +// +// By doubling on each subsequent recursive call we get the conservative +// upper bound for all scratch size computation macros of 2^26. +// + +#define SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_TO_DIVISOR( _nDigits ) (16 * (_nDigits)) // unused currently, but this catches errors + +#define SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_MUL( _nDigits ) (16 * (_nDigits)) // unused currently, but nonzero size catches errors + +#define SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_DIVMOD( _nSrcDigits, _nDivisorDigits ) ( (_nSrcDigits + 1) * SYMCRYPT_FDEF_DIGIT_SIZE ) + +#define SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_EXTENDED_GCD( _nDigits ) ( \ + 4 * SYMCRYPT_FDEF_SIZEOF_INT_FROM_DIGITS( _nDigits ) + \ + SYMCRYPT_FDEF_SIZEOF_INT_FROM_DIGITS( 2 * _nDigits ) + \ + 2 * SYMCRYPT_FDEF_SIZEOF_DIVISOR_FROM_DIGITS( _nDigits ) + \ + SYMCRYPT_MAX( SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_DIVMOD( 2 * _nDigits, _nDigits ), \ + SYMCRYPT_MAX( SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_MUL( 2 * _nDigits ), \ + SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_TO_DIVISOR( _nDigits ) )) ) + +#define SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( _nModDigits ) \ + ( (2*(_nModDigits) * SYMCRYPT_FDEF_DIGIT_SIZE) + \ + SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_DIVMOD( 2*(_nModDigits), _nModDigits )) // for mult: tmp product + divmod scratch + +#define SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_CRT_GENERATION( _nDigits ) ( \ + 2*SYMCRYPT_FDEF_SIZEOF_INT_FROM_DIGITS( _nDigits ) + \ + SYMCRYPT_MAX( SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_EXTENDED_GCD( _nDigits ), \ + SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( _nDigits ) )) + +#define SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_CRT_SOLUTION( _nDigits ) ( \ + SYMCRYPT_FDEF_SIZEOF_INT_FROM_DIGITS( _nDigits ) + \ + SYMCRYPT_FDEF_SIZEOF_MODELEMENT_FROM_DIGITS( _nDigits ) + \ + SYMCRYPT_FDEF_SIZEOF_INT_FROM_DIGITS( 2*_nDigits ) + \ + SYMCRYPT_MAX( SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( _nDigits ), \ + SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_MUL( 2*_nDigits ) )) + +#define SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_TO_MODULUS( _nDigits ) ( \ + SYMCRYPT_MAX( SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_TO_DIVISOR( _nDigits ),\ + (2*_nDigits+1) * SYMCRYPT_FDEF_DIGIT_SIZE + SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_DIVMOD( 2*_nDigits + 1, nDigits )) ) + +#define SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_MODINV( _nModDigits ) ( \ + 4 * SYMCRYPT_FDEF_SIZEOF_MODELEMENT_FROM_DIGITS( _nModDigits ) + \ + 3 * SYMCRYPT_FDEF_SIZEOF_INT_FROM_DIGITS( _nModDigits ) + \ + SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( _nModDigits ) ) + +#define SYMCRYPT_FDEF_MAX_WINDOW_MODEXP (6) + +#define SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_MODEXP( _nModDigits ) ( \ + ((1 << SYMCRYPT_FDEF_MAX_WINDOW_MODEXP) + 2) * SYMCRYPT_FDEF_SIZEOF_MODELEMENT_FROM_DIGITS( _nModDigits ) + \ + SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( _nModDigits ) ) + +#define SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_IS_POTENTIAL_PRIME( _nDigits ) (0) + +#define SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_MILLER_RABIN( _nDigits ) ( \ + SYMCRYPT_FDEF_SIZEOF_MODULUS_FROM_DIGITS(_nDigits) + \ + 3*SYMCRYPT_FDEF_SIZEOF_MODELEMENT_FROM_DIGITS(_nDigits) + \ + SYMCRYPT_FDEF_SIZEOF_INT_FROM_DIGITS(_nDigits) + \ + SYMCRYPT_MAX( SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_TO_MODULUS(_nDigits), \ + SYMCRYPT_MAX( SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS(_nDigits), \ + SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_MODEXP( _nDigits ) )) ) + +#define SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_IS_PRIME( _nDigits ) ( \ + SYMCRYPT_MAX( SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_IS_POTENTIAL_PRIME( _nDigits ), \ + SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_MILLER_RABIN( _nDigits ) )) + +#define SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_PRIME_GEN( _nDigits ) ( \ + SYMCRYPT_RSAKEY_MAX_NUMOF_PUBEXPS * SYMCRYPT_FDEF_SIZEOF_DIVISOR_FROM_DIGITS( 1 ) + \ + SYMCRYPT_FDEF_SIZEOF_INT_FROM_DIGITS( 1 ) + \ + SYMCRYPT_MAX( SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_TO_DIVISOR( 1 ), \ + SYMCRYPT_MAX( SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_DIVMOD( _nDigits, 1 ), \ + SYMCRYPT_MAX( SYMCRYPT_FDEF_SIZEOF_INT_FROM_DIGITS( _nDigits ), \ + SYMCRYPT_MAX( SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_IS_POTENTIAL_PRIME( _nDigits ), \ + SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_MILLER_RABIN( _nDigits ) ))))) + +// +// Upper bound for SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_MODMULTIEXP +// +// _nBase and _nBitsExp are bounded by SYMCRYPT_MODMULTIEXP_MAX_NBASES = 8 and +// SYMCRYPT_MODMULTIEXP_MAX_NBITSEXP = 2^20. Therefore the upper bound on this computation +// is +// 2^21 + 2^3*(2^6+4)*2^17 + 2^3*2^20*4 < 2^27 +// +#define SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_MODMULTIEXP( _nModDigits, _nBases, _nBitsExp ) ( \ + SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( _nModDigits ) + \ + ((_nBases)*(1<<SYMCRYPT_FDEF_MAX_WINDOW_MODEXP) + 4)*SYMCRYPT_FDEF_SIZEOF_MODELEMENT_FROM_DIGITS( _nModDigits ) + \ + (((_nBases)*(_nBitsExp)*sizeof(UINT32) + SYMCRYPT_ASYM_ALIGN_VALUE - 1) & ~(SYMCRYPT_ASYM_ALIGN_VALUE - 1)) ) +// Note: We need +4 multiplied with SYMCRYPT_FDEF_SIZEOF_MODELEMENT_FROM_DIGITS so that SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_MODMULTIEXP +// is always at least 2 modelements bigger than SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_MODEXP (see modexp.c) + +// +// Support for masked operations + +#define SYMCRYPT_MASK32_SET ((UINT32)-1) +#define SYMCRYPT_MASK32_NONZERO( _v ) ((UINT32)(((UINT64)0 - (_v)) >> 32)) +#define SYMCRYPT_MASK32_ZERO( _v ) (~SYMCRYPT_MASK32_NONZERO( _v )) +#define SYMCRYPT_MASK32_EQ( _a, _b ) (~SYMCRYPT_MASK32_NONZERO( (_a) ^ (_b) )) +#define SYMCRYPT_MASK32_LT( _a, _b ) ((UINT32)( ((UINT64)(_a) - (_b)) >> 32 )) + + +// +// Dispatch definitions +// When multiple formats are supported, this is where the information of the multiple formats is combined. +// +// See the comments in SYMCRYPT_FDEF_SCRATCH_XXX regarding 32 bit overflow protection. All results +// are bounded above by 2^27. +// + +#define SYMCRYPT_INTERNAL_SIZEOF_INT_FROM_BITS( _bitsize ) SYMCRYPT_FDEF_SIZEOF_INT_FROM_BITS( _bitsize ) +#define SYMCRYPT_INTERNAL_SIZEOF_DIVISOR_FROM_BITS( _bitsize ) SYMCRYPT_FDEF_SIZEOF_DIVISOR_FROM_BITS( _bitsize ) +#define SYMCRYPT_INTERNAL_SIZEOF_MODULUS_FROM_BITS( _bitsize ) SYMCRYPT_FDEF_SIZEOF_MODULUS_FROM_BITS( _bitsize ) +#define SYMCRYPT_INTERNAL_SIZEOF_MODELEMENT_FROM_BITS( _bitsize ) SYMCRYPT_FDEF_SIZEOF_MODELEMENT_FROM_BITS( _bitsize ) + +#define SYMCRYPT_INTERNAL_SIZEOF_RSAKEY_FROM_PARAMS( modBits, nPrimes, nPubExps ) SYMCRYPT_FDEF_SIZEOF_RSAKEY_FROM_PARAMS( modBits, nPrimes, nPubExps ) +// For now we don't need the pubExpBits so we drop them, but we might use them later. + +#define SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_INT_TO_DIVISOR( _nDigits ) SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_TO_DIVISOR( _nDigits ) +#define SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_INT_MUL( _nDigits ) SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_MUL( _nDigits ) +#define SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_INT_DIVMOD( _nSrcDigits, _nDivisorDigits ) SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_DIVMOD( _nSrcDigits, _nDivisorDigits ) +#define SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_EXTENDED_GCD( _nDigits ) SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_EXTENDED_GCD( _nDigits ) +#define SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( _nModDigits ) SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( _nModDigits ) +#define SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_CRT_GENERATION( _nDigits ) SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_CRT_GENERATION( _nDigits ) +#define SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_CRT_SOLUTION( _nDigits ) SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_CRT_SOLUTION( _nDigits ) +#define SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_INT_TO_MODULUS( _nDigits ) SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_TO_MODULUS( _nDigits ) +#define SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_MODINV( _nModDigits ) SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_MODINV( _nModDigits ) +#define SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_MODEXP( _nModDigits ) SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_MODEXP( _nModDigits ) +#define SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_INT_IS_PRIME( _nDigits ) SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_IS_PRIME( _nDigits ) +#define SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_INT_PRIME_GEN( _nDigits ) SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_PRIME_GEN( _nDigits ) + +#define SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_MODMULTIEXP( _nModDigits, _nBases, _nBitsExp ) SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_MODMULTIEXP( _nModDigits, _nBases, _nBitsExp ) + +// +// Forward declarations for MlKemkey types +// +SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_MLKEMKEY; +typedef struct _SYMCRYPT_MLKEMKEY SYMCRYPT_MLKEMKEY; +typedef SYMCRYPT_MLKEMKEY * PSYMCRYPT_MLKEMKEY; +typedef const SYMCRYPT_MLKEMKEY * PCSYMCRYPT_MLKEMKEY; + +// +// Forward declarations for MlDsakey types +// +struct _SYMCRYPT_MLDSAKEY; +typedef struct _SYMCRYPT_MLDSAKEY SYMCRYPT_MLDSAKEY; +typedef SYMCRYPT_MLDSAKEY * PSYMCRYPT_MLDSAKEY; +typedef const SYMCRYPT_MLDSAKEY * PCSYMCRYPT_MLDSAKEY; + +// +// Forward declarations for CompositeMlKemkey types +// +SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_COMPOSITE_MLKEMKEY; +typedef struct _SYMCRYPT_COMPOSITE_MLKEMKEY SYMCRYPT_COMPOSITE_MLKEMKEY; +typedef SYMCRYPT_COMPOSITE_MLKEMKEY * PSYMCRYPT_COMPOSITE_MLKEMKEY; +typedef const SYMCRYPT_COMPOSITE_MLKEMKEY * PCSYMCRYPT_COMPOSITE_MLKEMKEY; + +// +// RSA padding scratch definitions +// +// The maximum sizes of the state and the result for all hash algorithms are +// sizeof(SYMCRYPT_HASH_STATE) and SYMCRYPT_HASH_MAX_RESULT_SIZE, both not bigger +// 2^20. All the nBytes inputs are bounded by 2^17 (the maximum byte-size +// of the RSA modulus). +// +// Thus a total upper bound on these results is 2^20. +// +#define SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_RSA_OAEP( _hashAlgorithm, _nBytesOAEP ) ( SymCryptHashStateSize( _hashAlgorithm ) + \ + SymCryptHashResultSize( _hashAlgorithm ) + \ + 2*(_nBytesOAEP - 1) ) + +#define SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_RSA_PKCS1( _nBytesPKCS1 ) ( _nBytesPKCS1 ) + +#define SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_RSA_PSS( _hashAlgorithm, _nBytesMessage, _nBytesPSS ) ( SymCryptHashStateSize( _hashAlgorithm ) + \ + _nBytesMessage + \ + 3*(_nBytesPSS) + 5 ) + +// +// RSAKEY Type +// + +#define SYMCRYPT_FDEF_SIZEOF_RSAKEY_FROM_PARAMS( modBits, nPrimes, nPubExps ) \ + sizeof( SYMCRYPT_RSAKEY ) + \ + (nPrimes + 1) * SYMCRYPT_FDEF_SIZEOF_MODULUS_FROM_BITS( modBits ) + \ + nPrimes * SYMCRYPT_FDEF_SIZEOF_MODELEMENT_FROM_BITS( modBits ) + \ + (nPrimes + 1) * nPubExps * SYMCRYPT_FDEF_SIZEOF_INT_FROM_BITS( modBits ) +// 1 modulus object per prime + 1 for the RSA modulus +// 1 modelement for every crtInverse +// 1 int per pubexp for each privexp + 1 int per prime*pubexp for each crtprivexp + +#define SYMCRYPT_RSAKEY_MAX_NUMOF_PRIMES (2) +#define SYMCRYPT_RSAKEY_MAX_NUMOF_PUBEXPS (1) + +#define SYMCRYPT_RSAKEY_MIN_BITSIZE_MODULUS (256) // Some of our SCS code requires at least 32 bytes... +#define SYMCRYPT_RSAKEY_MAX_BITSIZE_MODULUS (1 << 16) // Avoid any integer overflows in size calculations + +// RSA FIPS self-tests require at least 496 bits to avoid fatal +// Require caller to specify NO_FIPS for up to 1024 bits as running FIPS tests on too-small keys +// does not make it FIPS certifiable and gives the wrong impression to callers +#define SYMCRYPT_RSAKEY_FIPS_MIN_BITSIZE_MODULUS (1024) + +#define SYMCRYPT_RSAKEY_MIN_BITSIZE_PRIME (128) +#define SYMCRYPT_RSAKEY_MAX_BITSIZE_PRIME (SYMCRYPT_RSAKEY_MAX_BITSIZE_MODULUS / 2) + +// Minimum allowable bit sizes for generated and imported parameters for +// the RSA modulus and each prime. + +typedef SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_RSAKEY { + UINT32 fAlgorithmInfo; // Tracks which algorithms the key can be used in + // Also tracks which per-key selftests have been performed on this key + // A bitwise OR of SYMCRYPT_FLAG_KEY_*, SYMCRYPT_FLAG_RSAKEY_*, and + // SYMCRYPT_PCT_* values + + UINT32 cbTotalSize; // Total size of the rsa key + BOOLEAN hasPrivateKey; // Set to true if there is private key information set + + UINT32 nSetBitsOfModulus; // Bits of modulus specified during creation + + UINT32 nBitsOfModulus; // Number of bits of the value of the modulus (not the object's size) + UINT32 nDigitsOfModulus; // Number of digits of the modulus object (always equal to SymCryptDigitsFromBits(nSetBitsOfModulus)) + + UINT32 nPubExp; // Number of public exponents + + UINT32 nPrimes; // Number of primes, can be 0 if the object only supports public keys + UINT32 nBitsOfPrimes[SYMCRYPT_RSAKEY_MAX_NUMOF_PRIMES]; + // Number of bits of the value of each prime (not the object's size) + UINT32 nDigitsOfPrimes[SYMCRYPT_RSAKEY_MAX_NUMOF_PRIMES]; + // Number of digits of each prime object + UINT32 nMaxDigitsOfPrimes; // Maximum number of digits in nDigitsOfPrimes + + UINT64 au64PubExp[SYMCRYPT_RSAKEY_MAX_NUMOF_PUBEXPS]; + // SYMCRYPT_ASYM_ALIGN'ed buffers that point to memory allocated for each object + PBYTE pbPrimes[SYMCRYPT_RSAKEY_MAX_NUMOF_PRIMES]; + PBYTE pbCrtInverses[SYMCRYPT_RSAKEY_MAX_NUMOF_PRIMES]; + PBYTE pbPrivExps[SYMCRYPT_RSAKEY_MAX_NUMOF_PUBEXPS]; + PBYTE pbCrtPrivExps[SYMCRYPT_RSAKEY_MAX_NUMOF_PUBEXPS * SYMCRYPT_RSAKEY_MAX_NUMOF_PRIMES]; + + // SymCryptObjects + PSYMCRYPT_MODULUS pmModulus; // The modulus N=p*q + PSYMCRYPT_MODULUS pmPrimes[SYMCRYPT_RSAKEY_MAX_NUMOF_PRIMES]; + // Pointers to the secret primes + PSYMCRYPT_MODELEMENT peCrtInverses[SYMCRYPT_RSAKEY_MAX_NUMOF_PRIMES]; + // Pointers to the CRT inverses of the primes + PSYMCRYPT_INT piPrivExps[SYMCRYPT_RSAKEY_MAX_NUMOF_PUBEXPS]; + // Pointers to the corresponding private exponents + PSYMCRYPT_INT piCrtPrivExps[SYMCRYPT_RSAKEY_MAX_NUMOF_PUBEXPS * SYMCRYPT_RSAKEY_MAX_NUMOF_PRIMES]; + // Pointers to the private exponents modulo each prime minus 1 (for CRT) + + SYMCRYPT_MAGIC_FIELD + // Followed by: + // Modulus + // Primes + // CrtInverses + // PrivExps + // CrtPrivExps +} SYMCRYPT_RSAKEY; +typedef SYMCRYPT_RSAKEY * PSYMCRYPT_RSAKEY; +typedef const SYMCRYPT_RSAKEY * PCSYMCRYPT_RSAKEY; + +// +// The following definitions relating to trial division are not needed by normal callers +// but are used by the test program to measure performance of components. +// + +typedef struct _SYMCRYPT_TRIALDIVISION_PRIME { + UINT64 invMod2e64; // Inverse of prime modulo 2^64 + UINT64 compareLimit; // floor( (2^{64}-1)/ prime ) +} SYMCRYPT_TRIALDIVISION_PRIME, *PSYMCRYPT_TRIALDIVISION_PRIME; +typedef const SYMCRYPT_TRIALDIVISION_PRIME * PCSYMCRYPT_TRIALDIVISION_PRIME; +// +// This structure is used to test whether a UINT64 is a multiple of a (small) prime. +// Let V be the input value, P the small prime, and W the inverse of P modulo 2^64. +// If V = k*P then V * M mod 2^64 = V/P mod 2^64 = k. +// This holds for k = 0, 1, ..., floor( (2^{64}-1)/p ). +// If V is not a multiple of P then the result of the multiplication must be larger than that. +// + +typedef struct _SYMCRYPT_TRIALDIVISION_GROUP { + UINT32 nPrimes; // # primes are in this group (use the next ones) + UINT32 factor[9]; // factors[i] = 2^{32*(i+1)} mod Prod where Prod = product of the primes + // It is guaranteed that Prod <= (2^{32}-1)/9 +} SYMCRYPT_TRIALDIVISION_GROUP, *PSYMCRYPT_TRIALDIVISION_GROUP; +typedef const SYMCRYPT_TRIALDIVISION_GROUP * PCSYMCRYPT_TRIALDIVISION_GROUP; + + +typedef struct _SYMCRYPT_TRIALDIVISION_CONTEXT { + SIZE_T nBytesAlloc; + UINT32 maxTrialPrime; + PSYMCRYPT_TRIALDIVISION_GROUP pGroupList; // terminated with 0 record + PSYMCRYPT_TRIALDIVISION_PRIME pPrimeList; // terminated with 0 record + PUINT32 pPrimes; // terminated with a 0. + SYMCRYPT_TRIALDIVISION_PRIME Primes3_5_17[3]; // Structures for 3, 5 and 17 in that order +} SYMCRYPT_TRIALDIVISION_CONTEXT, *PSYMCRYPT_TRIALDIVISION_CONTEXT; +typedef const SYMCRYPT_TRIALDIVISION_CONTEXT * PCSYMCRYPT_TRIALDIVISION_CONTEXT; + +UINT32 +SymCryptTestTrialdivisionMaxSmallPrime( PCSYMCRYPT_TRIALDIVISION_CONTEXT pContext ); // Expose small prime limit to help test code + +// +// DLGROUP type +// + +#define SYMCRYPT_DLGROUP_MIN_BITSIZE_P (32) +#define SYMCRYPT_DLGROUP_MIN_BITSIZE_Q (31) // Q must always be at least 1 bit shorter than P +// Minimum allowable bit sizes for generated and imported parameters for both P and +// Q primes. + +typedef SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_DLGROUP { + UINT32 cbTotalSize; // Total size of the dl group object + BOOLEAN fHasPrimeQ; // Flag that specifies whether the object has a Q parameter + + UINT32 nBitsOfP; // Number of bits of the value of P (not the object's size) + UINT32 cbPrimeP; // Number of bytes of the value of P (not the object's size), equal to ceil(nBitsOfP/8) + UINT32 nDigitsOfP; // Number of digits of the object of prime P + UINT32 nMaxBitsOfP; // Maximum number of bits of the value of P + + UINT32 nBitsOfQ; // Number of bits of the value of Q (not the object's bits) + UINT32 cbPrimeQ; // Number of bytes of the value of Q (not the object's size), equal to ceil(nBitsOfQ/8) + UINT32 nDigitsOfQ; // Number of digits of the object of prime Q + UINT32 nMaxBitsOfQ; // Maximum number of bits of the value of Q + + BOOLEAN isSafePrimeGroup; // Boolean indicating if this is a Safe Prime group + UINT32 nMinBitsPriv; // Minimum number of bits to be used in private keys for this group + // This only applies to named Safe Prime groups where this is related to the security strength + // i.e. this corresponds to 2s in SP800-56arev3 5.6.1.1.1 / 5.6.2.1.2 + UINT32 nDefaultBitsPriv; // Default number of bits used in private keys for this group + // Normally equals nBitsOfQ, but may be further restricted (i.e. for named Safe Prime groups) + // i.e. this corresponds to a default value of N in SP800-56arev3 5.6.1.1.1 / 5.6.2.1.2 + + UINT32 nBitsOfSeed; // Number of bits of the seed used for generation (seedlen in FIPS 186-3) + UINT32 cbSeed; // Number of bytes of the seed, equal to ceil(nBitsOfSeed/8) + + SYMCRYPT_DLGROUP_FIPS eFipsStandard; // Code specifying the FIPS standard used to create the keys. If 0 the group is unverified. + + PCSYMCRYPT_HASH pHashAlgorithm; // Hash algorithm used for the generation of parameters + UINT32 dwGenCounter; // Number of iterations used for the generation of parameters + BYTE bIndexGenG; // Index for the generation of generator G (FIPS 186-3) (Always 1 for now) + + PBYTE pbQ; // SYMCRYPT_ASYM_ALIGN'ed buffer that points to the memory allocated for modulus Q + + PSYMCRYPT_MODULUS pmP; // Pointer to the prime P + PSYMCRYPT_MODULUS pmQ; // Pointer to the prime Q + + PSYMCRYPT_MODELEMENT peG; // Pointer to the generator G + + PBYTE pbSeed; // Buffer that will hold the seed (this is padded at the end so that the entire structure + // has size a multiple of SYMCRYPT_ASYM_ALIGN_VALUE) + + SYMCRYPT_MAGIC_FIELD + + // P + // Q + // G + // Seed +} SYMCRYPT_DLGROUP; +typedef SYMCRYPT_DLGROUP * PSYMCRYPT_DLGROUP; +typedef const SYMCRYPT_DLGROUP * PCSYMCRYPT_DLGROUP; + +// +// DLKEY type +// +typedef SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_DLKEY { + UINT32 fAlgorithmInfo; // Tracks which algorithms the key can be used in + // Also tracks which per-key selftests have been performed on this key + // A bitwise OR of SYMCRYPT_FLAG_KEY_*, SYMCRYPT_FLAG_DLKEY_*, and + // SYMCRYPT_PCT_* values + + BOOLEAN fHasPrivateKey; // Set to true if there is a private key set + BOOLEAN fPrivateModQ; // Set to true if the private key is at most Q-1, otherwise it is at most P-2 + UINT32 nBitsPriv; // Number of bits used in private keys + + PCSYMCRYPT_DLGROUP pDlgroup; // Handle to the group which created the key + + PBYTE pbPrivate; // SYMCRYPT_ASYM_ALIGN'ed buffer that points to the memory allocated for the private key + + PSYMCRYPT_MODELEMENT pePublicKey; // Public key (modelement modulo P) + PSYMCRYPT_INT piPrivateKey; // Private key (integer up to 2^nBitsPriv-1, Q-1 or P-2) + + SYMCRYPT_MAGIC_FIELD + + // PublicKey + // PrivateKey // The size of this must always be the same as the size of P +} SYMCRYPT_DLKEY; +typedef SYMCRYPT_DLKEY * PSYMCRYPT_DLKEY; +typedef const SYMCRYPT_DLKEY * PCSYMCRYPT_DLKEY; + +// +// Elliptic Curve Function Types +// + +#define SYMCRYPT_ECPOINT_FORMAT_MAX_LENGTH 4 // Number of MODELEMENTs for the largest ECPOINT format + +// Coordinate representations for ECPOINTs +// NOTE: The value masked with 0xf gives you the number of coordinates +typedef enum _SYMCRYPT_ECPOINT_COORDINATES { + SYMCRYPT_ECPOINT_COORDINATES_INVALID = 0x00, // Invalid point representation + SYMCRYPT_ECPOINT_COORDINATES_SINGLE = 0x11, // Representation with only X + SYMCRYPT_ECPOINT_COORDINATES_AFFINE = 0x22, // Affine representation (X,Y) + SYMCRYPT_ECPOINT_COORDINATES_PROJECTIVE = 0x33, // Three equally-sized values where the triple (X,Y,Z) represents the affine point (X/Z, Y/Z) + SYMCRYPT_ECPOINT_COORDINATES_JACOBIAN = 0x43, // Three equally-sized values where the triple (X,Y,Z) represents the affine point (X/Z^2, Y/Z^3) + SYMCRYPT_ECPOINT_COORDINATES_EXTENDED_PROJECTIVE = 0x54, // Four equally-sized values where (X,Y,Z,T) represents the affine point (X/Z, Y/Z) with T=X*Y*Z + SYMCRYPT_ECPOINT_COORDINATES_SINGLE_PROJECTIVE = 0x62, // Two equally-sized values where (X,Z) represents the point (X/Z) +} SYMCRYPT_ECPOINT_COORDINATES; + +#define SYMCRYPT_INTERNAL_NUMOF_COORDINATES( _eCoordinates ) ((_eCoordinates) & 0xf) + + +// +// Curve-type-dependent information +// + +// Short-Weierstrass + +#define SYMCRYPT_ECURVE_SW_DEF_WINDOW (6) // Default window size for the windowed methods + +#define SYMCRYPT_ECURVE_SW_MAX_NPRECOMP_POINTS (64) // Maximum number of precomputed points + +typedef struct _SYMCRYPT_ECURVE_INFO_PRECOMP { + UINT32 window; // Window size + UINT32 nPrecompPoints; // Number of precomputed points + UINT32 nRecodedDigits; // Number of recoded digits + PSYMCRYPT_ECPOINT poPrecompPoints[SYMCRYPT_ECURVE_SW_MAX_NPRECOMP_POINTS]; + // Table of pointers to precomputed powers of the distinguished point +} SYMCRYPT_ECURVE_INFO_PRECOMP; + +// +// ECURVE object +// + +#define SYMCRYPT_ECURVE_MIN_BITSIZE_FMOD (32) +#define SYMCRYPT_ECURVE_MIN_BITSIZE_GORD (32) +#define SYMCRYPT_ECURVE_MAX_COFACTOR_POWER (8) +// Minimum (maximum for cofactor) allowable bit sizes for imported +// parameters for field modulus, group order of curve (and cofactor). + +#define SYMCRYPT_INTERNAL_ECURVE_VERSION_LATEST 1 + +typedef enum _SYMCRYPT_INTERNAL_ECURVE_TYPE { + SYMCRYPT_INTERNAL_ECURVE_TYPE_SHORT_WEIERSTRASS = 1, + SYMCRYPT_INTERNAL_ECURVE_TYPE_TWISTED_EDWARDS = 2, + SYMCRYPT_INTERNAL_ECURVE_TYPE_MONTGOMERY = 3, + SYMCRYPT_INTERNAL_ECURVE_TYPE_SHORT_WEIERSTRASS_AM3 = 4,// This type is a specialization of Short-Weierstrass when A == -3 + // This condition is detected and used for all NIST prime curves +} SYMCRYPT_INTERNAL_ECURVE_TYPE; + +C_ASSERT((UINT32)SYMCRYPT_INTERNAL_ECURVE_TYPE_SHORT_WEIERSTRASS == (UINT32)SYMCRYPT_ECURVE_TYPE_SHORT_WEIERSTRASS ); +C_ASSERT((UINT32)SYMCRYPT_INTERNAL_ECURVE_TYPE_TWISTED_EDWARDS == (UINT32)SYMCRYPT_ECURVE_TYPE_TWISTED_EDWARDS ); +C_ASSERT((UINT32)SYMCRYPT_INTERNAL_ECURVE_TYPE_MONTGOMERY == (UINT32)SYMCRYPT_ECURVE_TYPE_MONTGOMERY ); + +typedef SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_ECURVE { + UINT32 version; // Version # + SYMCRYPT_INTERNAL_ECURVE_TYPE + type; // Internal type of the curve + SYMCRYPT_ECPOINT_COORDINATES + eCoordinates; // Default representation of the EC points + + UINT32 FModBitsize; // Bitsize of the field modulus + UINT32 FModDigits; // Number of digits of the field modulus + UINT32 FModBytesize; // Bytesize of the field modulus (specified in the curve parameters as cbFieldLength) + + UINT32 GOrdBitsize; // Bitsize of the (sub)group order + UINT32 GOrdDigits; // Number of digits of the (sub)group order + UINT32 GOrdBytesize; // Bytesize of the (sub)group order (specified in the curve parameters as cbSubgroupOrder) + + UINT32 cbModElement; // (Internal) bytesize of one mod element + + UINT32 cbAlloc; // Bytesize of the total curve blob + + UINT32 cbScratchCommon; // Size of scratch space for common ecurve operations + UINT32 cbScratchScalar; // Size of constant scratch space for scalar ecurve operations (without the nPoints dependence) + UINT32 cbScratchScalarMulti; // Dependence of scratch space for scalar ecurve operations from nPoints + UINT32 cbScratchGetSetValue; // Size of scratch space for get set value ecpoint operations + UINT32 cbScratchEckey; // Size of scratch space for eckey operations + + UINT32 coFactorPower; // The cofactor of the curve will be equal to 2^coFactorPower + + // Parameters V2 Extensions + UINT32 PrivateKeyDefaultFormat; + UINT32 HighBitRestrictionNumOfBits; + UINT32 HighBitRestrictionPosition; + UINT32 HighBitRestrictionValue; + + union { + + SYMCRYPT_ECURVE_INFO_PRECOMP sw; // Info for short Weierstrass curves (only the precomputation parameters are needed now) + + } info; // Precomputed information related to each curve + + PSYMCRYPT_MODULUS FMod; // Field modulus + PSYMCRYPT_MODULUS GOrd; // Order of the subgroup + + PSYMCRYPT_MODELEMENT A; // Parameter A + PSYMCRYPT_MODELEMENT B; // Parameter B + PSYMCRYPT_ECPOINT G; // Distinguished point (generator of the subgroup) + PSYMCRYPT_INT H; // Cofactor of the curve + + SYMCRYPT_MAGIC_FIELD + + // FMod + // A + // B + // GOrd + // H + // G +} SYMCRYPT_ECURVE; +typedef SYMCRYPT_ECURVE * PSYMCRYPT_ECURVE; +typedef const SYMCRYPT_ECURVE * PCSYMCRYPT_ECURVE; + +#define SYMCRYPT_INTERNAL_ECPOINT_COORDINATE_OFFSET( _pCurve, _ord ) ( sizeof(SYMCRYPT_ECPOINT) + (_ord) * (_pCurve)->cbModElement ) +#define SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( _ord, _pCurve, _pEcpoint ) (PSYMCRYPT_MODELEMENT)( (PBYTE)(_pEcpoint) + SYMCRYPT_INTERNAL_ECPOINT_COORDINATE_OFFSET( (_pCurve), _ord ) ) + +// Convenience macros to make adding internal specializations easier +#define SYMCRYPT_CURVE_IS_SHORT_WEIERSTRASS_TYPE( _pCurve ) \ + ( _pCurve->type == SYMCRYPT_INTERNAL_ECURVE_TYPE_SHORT_WEIERSTRASS || \ + _pCurve->type == SYMCRYPT_INTERNAL_ECURVE_TYPE_SHORT_WEIERSTRASS_AM3 ) + +#define SYMCRYPT_CURVE_IS_TWISTED_EDWARDS_TYPE( _pCurve ) \ + ( _pCurve->type == SYMCRYPT_INTERNAL_ECURVE_TYPE_TWISTED_EDWARDS ) + +#define SYMCRYPT_CURVE_IS_MONTGOMERY_TYPE( _pCurve ) \ + ( _pCurve->type == SYMCRYPT_INTERNAL_ECURVE_TYPE_MONTGOMERY ) + +// +// Scratch space sizes for ECURVE operations +// +// Overflow protection is enforced when creating the ECURVE objects on +// the cbScratchCommon, cbScratchScalar, cbScratchScalarMulti, and cbScratchEckey fields. +// +// All of them are upper bounded by 2^26 (see SymCrypt<CurveType>FillScratchSpaces functions) +// and since _nPoints is bounded by SYMCRYPT_ECURVE_MULTI_SCALAR_MUL_MAX_NPOINTS = 2, all +// the macros are bounded by 2^27. +// + +#define SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS( _pCurve ) ( (_pCurve)->cbScratchCommon) +#define SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_SCALAR_ECURVE_OPERATIONS( _pCurve, _nPoints ) ( (_pCurve)->cbScratchScalar + \ + (_nPoints) * (_pCurve)->cbScratchScalarMulti ) +#define SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_GETSET_VALUE_ECURVE_OPERATIONS( _pCurve ) ( (_pCurve)->cbScratchGetSetValue) +#define SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_ECKEY_ECURVE_OPERATIONS( _pCurve ) ( (_pCurve)->cbScratchEckey) + +typedef SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_ECPOINT { + BOOLEAN normalized; // A flag specifying whether the point is normalized or not. This flag + // makes sense only for PROJECTIVE, JACOBIAN, EXTENDED_PROJECTIVE, and + // SINGLE_PROJECTIVE coordinates. If set to TRUE (non-zero), it means + // that the Z coordinate of the point is equal to 1. + PCSYMCRYPT_ECURVE pCurve; // Handle to the curve which the point is on. Only used in CHKed builds for ASSERTs + SYMCRYPT_MAGIC_FIELD + // An array of MODELEMENTs. The total size will depend on the MODELEMENT size and the number of MODELEMENTs. +} SYMCRYPT_ECPOINT, *PSYMCRYPT_ECPOINT; +typedef const SYMCRYPT_ECPOINT * PCSYMCRYPT_ECPOINT; + +typedef SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_ECKEY { + UINT32 fAlgorithmInfo; // Tracks which algorithms the key can be used in + // Also tracks which per-key selftests have been performed on this key + // A bitwise OR of SYMCRYPT_FLAG_KEY_*, SYMCRYPT_FLAG_ECKEY_*, and + // SYMCRYPT_PCT_* values + BOOLEAN hasPrivateKey; // Set to true if there is a private key set + PCSYMCRYPT_ECURVE pCurve; // Handle to the curve which created the key + + PSYMCRYPT_ECPOINT poPublicKey; // Public key (ECPOINT) + PSYMCRYPT_INT piPrivateKey; // Private key + + SYMCRYPT_MAGIC_FIELD + + // PublicKey + // PrivateKey +} SYMCRYPT_ECKEY; +typedef SYMCRYPT_ECKEY * PSYMCRYPT_ECKEY; +typedef const SYMCRYPT_ECKEY * PCSYMCRYPT_ECKEY; + +SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_802_11_SAE_CUSTOM_STATE { + PSYMCRYPT_ECURVE pCurve; + PCSYMCRYPT_MAC macAlgorithm; + PSYMCRYPT_MODELEMENT peRand; + PSYMCRYPT_MODELEMENT peMask; + PSYMCRYPT_ECPOINT poPWE; + BYTE counter; +}; + +// +// XMSS +// + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_XMSS_PARAMS +{ + PCSYMCRYPT_HASH hash; // hash function + UINT32 id; // algorithm identifier + UINT32 cbHashOutput; // hash function output size, must be less than or equal to hash->resultSize + UINT32 nWinternitzWidth;// Winternitz coefficient, width of digits in bits (chain length = 2^nWinternitzWidth) + UINT32 nTotalTreeHeight;// number of layers times the tree height of one layer (each layer has the same height) + UINT32 nLayers; // hyper-tree layers, 1 for single tree + UINT32 cbPrefix; // length of the domain separator prefix in PRFs + + // + // The following are derived from the above + // + UINT32 len1; // number of w-bit digits in the hash output to be signed ( len1 = ceil(8n / w) ) + UINT32 len2; // number of w-bit digits in the checksum + UINT32 len; // len1 + len2 + UINT32 nLayerHeight; // tree height of a single layer (h / d) + UINT32 cbIdx; // size of leaf counter in bytes (for single trees cbIdx = 4) + UINT32 nLeftShift32; // left shift count to align the checksum digits to MSB of a 32-bit word + + BYTE Reserved[16]; // Reserved for future use +} SYMCRYPT_XMSS_PARAMS; + +typedef SYMCRYPT_XMSS_PARAMS* PSYMCRYPT_XMSS_PARAMS; +typedef const SYMCRYPT_XMSS_PARAMS* PCSYMCRYPT_XMSS_PARAMS; + +struct _SYMCRYPT_XMSS_KEY; +typedef struct _SYMCRYPT_XMSS_KEY SYMCRYPT_XMSS_KEY; +typedef SYMCRYPT_XMSS_KEY* PSYMCRYPT_XMSS_KEY; +typedef const SYMCRYPT_XMSS_KEY* PCSYMCRYPT_XMSS_KEY; + + +//========================================================================== +// LMS internal structures +//========================================================================== + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_LMS_PARAMS +{ + // algorithm ID of the LMS signature scheme + UINT32 lmsAlgID; + + // algorithm ID of the LM-OTS signature scheme + UINT32 lmsOtsAlgID; + + // hash function pointer to be used as part of the LMS operations + PCSYMCRYPT_HASH pLmsHashFunction; + + // the height of the LMS tree. There are 2^h leaves in the tree - h + UINT32 nTreeHeight; + + // the number of bytes for each tree node, equals to the output length of the hash function - m, n + UINT32 cbHashOutput; + + // Winternitz coefficient, width of digits in bits (chain length = 2^w) - w + UINT32 nWinternitzChainWidth; + + // the number of n-byte string elements that make up the LM-OTS signature - p + UINT32 nByteStringCount; + + // the number of left-shift bits used in the checksum function Cksm - ls + UINT32 nChecksumLShiftBits; +} SYMCRYPT_LMS_PARAMS; +typedef SYMCRYPT_LMS_PARAMS* PSYMCRYPT_LMS_PARAMS; +typedef const SYMCRYPT_LMS_PARAMS* PCSYMCRYPT_LMS_PARAMS; + +struct _SYMCRYPT_LMS_KEY; +typedef struct _SYMCRYPT_LMS_KEY SYMCRYPT_LMS_KEY; +typedef SYMCRYPT_LMS_KEY* PSYMCRYPT_LMS_KEY; +typedef const SYMCRYPT_LMS_KEY* PCSYMCRYPT_LMS_KEY; + +#ifndef _PREFAST_ +#if SYMCRYPT_CPU_X86 +#pragma warning(pop) +#endif +#endif + + + +////////////////////////////////////////////////////////// +// +// Environment macros +// + +#ifdef __cplusplus +#define SYMCRYPT_EXTERN_C extern "C" { +#define SYMCRYPT_EXTERN_C_END } +#else +#define SYMCRYPT_EXTERN_C +#define SYMCRYPT_EXTERN_C_END +#endif + +// +// Callers of SymCrypt should NOT depend on the function names in these macros. +// The definition of these macros can change in future releases of the library. +// + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 +typedef struct _SYMCRYPT_EXTENDED_SAVE_DATA SYMCRYPT_EXTENDED_SAVE_DATA, *PSYMCRYPT_EXTENDED_SAVE_DATA; + +#define SYMCRYPT_ENVIRONMENT_DEFS_SAVEYMM( envName ) \ + SYMCRYPT_ERROR SYMCRYPT_CALL SymCryptSaveYmmEnv##envName( _Out_ PSYMCRYPT_EXTENDED_SAVE_DATA pSaveArea ); \ + SYMCRYPT_ERROR SYMCRYPT_CALL SymCryptSaveYmm( _Out_ PSYMCRYPT_EXTENDED_SAVE_DATA pSaveArea ) \ + { return SymCryptSaveYmmEnv##envName( pSaveArea ); } \ + \ + VOID SYMCRYPT_CALL SymCryptRestoreYmmEnv##envName( _Inout_ PSYMCRYPT_EXTENDED_SAVE_DATA pSaveArea ); \ + VOID SYMCRYPT_CALL SymCryptRestoreYmm( _Inout_ PSYMCRYPT_EXTENDED_SAVE_DATA pSaveArea ) \ + { SymCryptRestoreYmmEnv##envName( pSaveArea ); } \ + +#define SYMCRYPT_ENVIRONMENT_DEFS_SAVEXMM( envName ) \ + SYMCRYPT_ERROR SYMCRYPT_CALL SymCryptSaveXmmEnv##envName( _Out_ PSYMCRYPT_EXTENDED_SAVE_DATA pSaveArea ); \ + SYMCRYPT_ERROR SYMCRYPT_CALL SymCryptSaveXmm( _Out_ PSYMCRYPT_EXTENDED_SAVE_DATA pSaveArea ) \ + { return SymCryptSaveXmmEnv##envName( pSaveArea ); } \ + \ + VOID SYMCRYPT_CALL SymCryptRestoreXmmEnv##envName( _Inout_ PSYMCRYPT_EXTENDED_SAVE_DATA pSaveArea ); \ + VOID SYMCRYPT_CALL SymCryptRestoreXmm( _Inout_ PSYMCRYPT_EXTENDED_SAVE_DATA pSaveArea ) \ + { SymCryptRestoreXmmEnv##envName( pSaveArea ); } \ + + +#else + +#define SYMCRYPT_ENVIRONMENT_DEFS_SAVEYMM( envName ) +#define SYMCRYPT_ENVIRONMENT_DEFS_SAVEXMM( envName ) + +#endif + +// Environment forwarding functions. +// CPUIDEX is only forwarded on CPUs that have it. +#if SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_X86 +#define SYMCRYPT_ENVIRONMENT_FORWARD_CPUIDEX( envName ) \ + VOID SYMCRYPT_CALL SymCryptCpuidExFuncEnv##envName( int cpuInfo[4], int function_id, int subfunction_id ); \ + VOID SYMCRYPT_CALL SymCryptCpuidExFunc( int cpuInfo[4], int function_id, int subfunction_id ) \ + { SymCryptCpuidExFuncEnv##envName( cpuInfo, function_id, subfunction_id ); } +#else +#define SYMCRYPT_ENVIRONMENT_FORWARD_CPUIDEX( envName ) +#endif + +#define SYMCRYPT_ENVIRONMENT_DEFS( envName ) \ +SYMCRYPT_EXTERN_C \ + VOID SYMCRYPT_CALL SymCryptInitEnv##envName( UINT32 version ); \ + VOID SYMCRYPT_CALL SymCryptInit(void) \ + { SymCryptInitEnv##envName( SYMCRYPT_API_VERSION ); } \ + \ + _Analysis_noreturn_ VOID SYMCRYPT_CALL SymCryptFatalEnv##envName( UINT32 fatalCode ); \ + _Analysis_noreturn_ VOID SYMCRYPT_CALL SymCryptFatal( UINT32 fatalCode ) \ + { SymCryptFatalEnv##envName( fatalCode ); } \ + SYMCRYPT_CPU_FEATURES SYMCRYPT_CALL SymCryptCpuFeaturesNeverPresentEnv##envName(void); \ + SYMCRYPT_CPU_FEATURES SYMCRYPT_CALL SymCryptCpuFeaturesNeverPresent(void) \ + { return SymCryptCpuFeaturesNeverPresentEnv##envName(); } \ + \ + SYMCRYPT_ENVIRONMENT_DEFS_SAVEXMM( envName ) \ + SYMCRYPT_ENVIRONMENT_DEFS_SAVEYMM( envName ) \ + \ + VOID SYMCRYPT_CALL SymCryptTestInjectErrorEnv##envName( PBYTE pbBuf, SIZE_T cbBuf ); \ + VOID SYMCRYPT_CALL SymCryptInjectError( PBYTE pbBuf, SIZE_T cbBuf ) \ + { SymCryptTestInjectErrorEnv##envName( pbBuf, cbBuf ); } \ + SYMCRYPT_ENVIRONMENT_FORWARD_CPUIDEX( envName ) \ +SYMCRYPT_EXTERN_C_END + +// +// To avoid hard-do-diagnose mistakes, we skip defining environment macros in those cases where we +// know they cannot or should not be used. +// + +#define SYMCRYPT_ENVIRONMENT_GENERIC SYMCRYPT_ENVIRONMENT_DEFS( Generic ) + +#if defined(EFI) | defined(PCAT) | defined(DIRECT) +#define SYMCRYPT_ENVIRONMENT_WINDOWS_BOOTLIBRARY SYMCRYPT_ENVIRONMENT_DEFS( WindowsBootlibrary ) +#endif + +// +// There are no defined symbols that we can use to detect that we are in debugger code +// But this is unlikely to be misused. +// +#define SYMCRYPT_ENVIRONMENT_WINDOWS_KERNELDEBUGGER SYMCRYPT_ENVIRONMENT_DEFS( WindowsKernelDebugger ) + + + +#define SYMCRYPT_ENVIRONMENT_WINDOWS_KERNELMODE_LEGACY SYMCRYPT_ENVIRONMENT_GENERIC + +#ifdef NTDDI_VERSION +#if (NTDDI_VERSION >= NTDDI_WIN7) +#define SYMCRYPT_ENVIRONMENT_WINDOWS_KERNELMODE_WIN7_N_LATER SYMCRYPT_ENVIRONMENT_DEFS( WindowsKernelmodeWin7nLater ) +#endif + +#if (NTDDI_VERSION >= NTDDI_WINBLUE) +#define SYMCRYPT_ENVIRONMENT_WINDOWS_KERNELMODE_WIN8_1_N_LATER SYMCRYPT_ENVIRONMENT_DEFS( WindowsKernelmodeWin8_1nLater ) +#endif + +#define SYMCRYPT_ENVIRONMENT_WINDOWS_KERNELMODE_LATEST SYMCRYPT_ENVIRONMENT_WINDOWS_KERNELMODE_WIN8_1_N_LATER + + + +#define SYMCRYPT_ENVIRONMENT_WINDOWS_USERMODE_LEGACY SYMCRYPT_ENVIRONMENT_GENERIC + +#if (NTDDI_VERSION >= NTDDI_WIN7) +#define SYMCRYPT_ENVIRONMENT_WINDOWS_USERMODE_WIN7_N_LATER SYMCRYPT_ENVIRONMENT_DEFS( WindowsUsermodeWin7nLater ) +#endif + +#if (NTDDI_VERSION >= NTDDI_WINBLUE) +#define SYMCRYPT_ENVIRONMENT_WINDOWS_USERMODE_WIN8_1_N_LATER SYMCRYPT_ENVIRONMENT_DEFS( WindowsUsermodeWin8_1nLater ) +#endif + +#if (NTDDI_VERSION >= NTDDI_WIN10) +#define SYMCRYPT_ENVIRONMENT_WINDOWS_USERMODE_WIN10_SGX SYMCRYPT_ENVIRONMENT_DEFS( Win10Sgx ) +#endif +#endif // NTDDI_VERSION + +#define SYMCRYPT_ENVIRONMENT_WINDOWS_USERMODE_LATEST SYMCRYPT_ENVIRONMENT_WINDOWS_USERMODE_WIN8_1_N_LATER + + +#define SYMCRYPT_ENVIRONMENT_POSIX_USERMODE SYMCRYPT_ENVIRONMENT_DEFS( PosixUsermode ) + +// For backwards compatibility with previous macro name +#define SYMCRYPT_ENVIRONMENT_LINUX_USERMODE SYMCRYPT_ENVIRONMENT_POSIX_USERMODE + + +#define SYMCRYPT_ENVIRONMENT_OPTEE_TA SYMCRYPT_ENVIRONMENT_DEFS( OpteeTa ) + +////////////////////////////////////////////////////////// +// +// SymCryptWipe & SymCryptWipeKnownSize +// + +VOID +SYMCRYPT_CALL +SymCryptWipe( + _Out_writes_bytes_(cbData) PVOID pbData, + SIZE_T cbData); + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM | SYMCRYPT_CPU_ARM64 + +// +// If the known size is large we call the generic wipe function anyway. +// For small known sizes we perform the wipe inline. +// This is a tradeoff between speed and code size and there are diminishing returns to supporting +// increasingly large sizes. +// We currently put the limit at ~8 native writes, which varies by platform. +// +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_ARM +#define SYMCRYPT_WIPE_FUNCTION_LIMIT (32) // If this is increased beyond 127 the code below must be updated. +#elif SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM64 +#define SYMCRYPT_WIPE_FUNCTION_LIMIT (64) // If this is increased beyond 127 the code below must be updated. +#else +#error ?? +#endif + +// +// The buffer analysis code doesn't understand our optimized in-line wiping code +// well enough to conclude it is safe. +// +#pragma prefast(push) +#pragma prefast( disable: 26001 ) + +static +FORCEINLINE +VOID +SYMCRYPT_CALL +#pragma prefast( suppress: 6101, "Logic why this properly initializes the pbData buffer is too complicated for prefast" ) +SymCryptWipeKnownSize(_Out_writes_bytes_(cbData) PVOID pbData, SIZE_T cbData) +{ + volatile BYTE * pb = (volatile BYTE *)pbData; + + if (cbData > SYMCRYPT_WIPE_FUNCTION_LIMIT) + { + SymCryptWipe(pbData, cbData); + } + else + { + // + // We assume that pb is aligned, so we wipe from the end to the front to keep alignment. + // + if (cbData & 1) + { + cbData--; + SYMCRYPT_INTERNAL_FORCE_WRITE8((volatile BYTE *)&pb[cbData], 0); + } + if (cbData & 2) + { + cbData -= 2; + SYMCRYPT_INTERNAL_FORCE_WRITE16((volatile UINT16 *)&pb[cbData], 0); + } + if (cbData & 4) + { + cbData -= 4; + SYMCRYPT_INTERNAL_FORCE_WRITE32((volatile UINT32 *)&pb[cbData], 0); + } + if (cbData & 8) + { + cbData -= 8; + SYMCRYPT_INTERNAL_FORCE_WRITE64((volatile UINT64 *)&pb[cbData], 0); + } + if (cbData & 16) + { + cbData -= 16; + SYMCRYPT_INTERNAL_FORCE_WRITE64((volatile UINT64 *)&pb[cbData], 0); + SYMCRYPT_INTERNAL_FORCE_WRITE64((volatile UINT64 *)&pb[cbData + 8], 0); + } + if (cbData & 32) + { + cbData -= 32; + SYMCRYPT_INTERNAL_FORCE_WRITE64((volatile UINT64 *)&pb[cbData], 0); + SYMCRYPT_INTERNAL_FORCE_WRITE64((volatile UINT64 *)&pb[cbData + 8], 0); + SYMCRYPT_INTERNAL_FORCE_WRITE64((volatile UINT64 *)&pb[cbData + 16], 0); + SYMCRYPT_INTERNAL_FORCE_WRITE64((volatile UINT64 *)&pb[cbData + 24], 0); + } +#if SYMCRYPT_WIPE_FUNCTION_LIMIT >= 64 + if (cbData & 64) + { + cbData -= 64; + SYMCRYPT_INTERNAL_FORCE_WRITE64((volatile UINT64 *)&pb[cbData], 0); + SYMCRYPT_INTERNAL_FORCE_WRITE64((volatile UINT64 *)&pb[cbData + 8], 0); + SYMCRYPT_INTERNAL_FORCE_WRITE64((volatile UINT64 *)&pb[cbData + 16], 0); + SYMCRYPT_INTERNAL_FORCE_WRITE64((volatile UINT64 *)&pb[cbData + 24], 0); + SYMCRYPT_INTERNAL_FORCE_WRITE64((volatile UINT64 *)&pb[cbData + 32], 0); + SYMCRYPT_INTERNAL_FORCE_WRITE64((volatile UINT64 *)&pb[cbData + 40], 0); + SYMCRYPT_INTERNAL_FORCE_WRITE64((volatile UINT64 *)&pb[cbData + 48], 0); + SYMCRYPT_INTERNAL_FORCE_WRITE64((volatile UINT64 *)&pb[cbData + 56], 0); + } +#endif + } +} + +#pragma prefast(pop) + +#else // Platform switch for SymCryptWipeKnownSize + +static +FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptWipeKnownSize(_Out_writes_bytes_(cbData) PVOID pbData, SIZE_T cbData) +{ + SymCryptWipe(pbData, cbData); +} + +#endif // Platform switch for SymCryptWipeKnownSize + +#define SYMCRYPT_FIPS_ASSERT(x) { if(!(x)){ SymCryptFatal('FIPS'); } } + +// Flags for FIPS on-demand selftests. When an on-demand selftest succeeds, the corresponding flag +// will be set in g_SymCryptFipsSelftestsPerformed. Other selftests are performed automatically +// when the module is loaded, so they don't have a corresponding flag. +typedef enum _SYMCRYPT_SELFTEST_ALGORITHM { + SYMCRYPT_SELFTEST_ALGORITHM_NONE = 0x0, + SYMCRYPT_SELFTEST_ALGORITHM_STARTUP = 0x1, + SYMCRYPT_SELFTEST_ALGORITHM_DSA = 0x2, + SYMCRYPT_SELFTEST_ALGORITHM_ECDSA = 0x4, + SYMCRYPT_SELFTEST_ALGORITHM_RSA = 0x8, + SYMCRYPT_SELFTEST_ALGORITHM_DH = 0x10, + SYMCRYPT_SELFTEST_ALGORITHM_ECDH = 0x20, + SYMCRYPT_SELFTEST_ALGORITHM_MLKEM = 0x40, + SYMCRYPT_SELFTEST_ALGORITHM_XMSS = 0x80, + SYMCRYPT_SELFTEST_ALGORITHM_LMS = 0x100, + SYMCRYPT_SELFTEST_ALGORITHM_MLDSA = 0x200, +} SYMCRYPT_SELFTEST_ALGORITHM; + +// Takes values which are some bitwise OR combination of SYMCRYPT_SELFTEST_ALGORITHM values +// Specified as UINT32 as we will update with 32 bit atomics, and compilers may choose to make enum +// types smaller than 32 bits. +extern UINT32 g_SymCryptFipsSelftestsPerformed; + +UINT32 +SYMCRYPT_CALL +SymCryptFipsGetSelftestsPerformed(void); +// Returns current value of g_SymCryptFipsSelftestsPerformed so callers may inspect which FIPS +// algorithm selftests have run + +// Flags for per-key selftests. +// When an asymmetric key is generated or imported, and SYMCRYPT_FLAG_KEY_NO_FIPS is not specified, +// some selftests must be performed on the key, before its operational use in an algorithm, to +// comply with FIPS. +// The algorithms the key may be used in will be tracked in the key's fAlgorithmInfo field, as a +// bitwise OR of SYMCRYPT_FLAG_<keytype>_<algorithm> (e.g. SYMCRYPT_FLAG_DLKEY_DH). +// This field will also track which per-key selftests have been run on the key using the below flags +// We want to track which selftests have been run independently of which algorithms the key may be +// used in as in some scenarios at key generation / import time we may not know what algorithm the +// key will actually be used in. Tracking the run per-key selftests in fAlgorithmInfo allows us to +// defer running expensive tests until we know they are required (e.g. if we generate an Eckey which +// may be used in ECDH or ECDSA, and only use it for ECDH, the ECDSA PCT is deferred until we first +// attempt to use the key in ECDSA, or export the private key). +// +// For clarity, SYMCRYPT_PCT_* should be used instead of SYMCRYPT_SELFTEST_KEY_* going forward. +// The latter is retained for compatibility with existing code, but may be removed in a future +// breaking change. + +// Dlkey selftest flags +// DSA Pairwise Consistency Test to be run on generated keys +#define SYMCRYPT_SELFTEST_KEY_DSA (0x1) +#define SYMCRYPT_PCT_DSA SYMCRYPT_SELFTEST_KEY_DSA + +// Eckey selftest flags +// ECDSA Pairwise Consistency Test to be run on generated keys +#define SYMCRYPT_SELFTEST_KEY_ECDSA (0x1) +#define SYMCRYPT_PCT_ECDSA SYMCRYPT_SELFTEST_KEY_ECDSA + +// Rsakey selftest flags +// RSA Pairwise Consistency Test to be run on generated keys +#define SYMCRYPT_SELFTEST_KEY_RSA_SIGN (0x1) +#define SYMCRYPT_PCT_RSA_SIGN SYMCRYPT_SELFTEST_KEY_RSA_SIGN + +UINT32 +SYMCRYPT_CALL +SymCryptDeprecatedStatusIndicator(PBYTE pbOutput, UINT32 cbOutput); +// +// Returns the FIPS Approved Services Status Indicator as an ASCII string. +// This API is required to satisfy FIPS 140-3 requirements, but is *not* recommended +// to be used in production code. It should be considered unstable, +// and may be removed at any time. +// +// The output string will be copied to pbOutput if the size of the buffer +// cbOutput is large enough. The function returns the required buffer size +// when pbOutput is passed as NULL. If pbOutput is not NULL, the function +// returns the number of bytes copied to pbOutput. +// + + + +typedef enum _SYMCRYPT_SI_TYPE { + + // Algorithm types (specific algorithms are represented as a bitmask of a type) + SYMCRYPT_SI_TYPE_CIPHER = 0x01, + SYMCRYPT_SI_TYPE_HASH = 0x02, + SYMCRYPT_SI_TYPE_MAC = 0x03, + SYMCRYPT_SI_TYPE_KDF = 0x04, + SYMCRYPT_SI_TYPE_DRBG = 0x05, + SYMCRYPT_SI_TYPE_ASYM_ALG = 0x06, + SYMCRYPT_SI_TYPE_KAS = 0x07, + SYMCRYPT_SI_TYPE_KEM = 0x08, + + // Other types where elements are a bitmask + SYMCRYPT_SI_TYPE_ECURVE = 0x40, + SYMCRYPT_SI_TYPE_KAS_SCHEME = 0x41, + SYMCRYPT_SI_TYPE_SAFE_PRIME_GROUP = 0x42, + + // Non-bitmask types + SYMCRYPT_SI_TYPE_INTRANGE = 0x80, + SYMCRYPT_SI_TYPE_INTPAIR = 0x81, + SYMCRYPT_SI_TYPE_SIZERANGE = 0x82, + + SYMCRYPT_SI_TYPE_MAX = 0xFF +} SYMCRYPT_SI_TYPE; + +#define SYMCRYPT_SI_CREATE_ID(type, index) (((UINT64)(type) << 56) + (1ULL << (index))) + +#define SYMCRYPT_SI_INTBITS ((64 - 8) / 2) // 8-bits for type, remaining bits shared by two integers +#define SYMCRYPT_SI_INTMASK ((1ULL << SYMCRYPT_SI_INTBITS) - 1) // typically should be 0x0FFFFFFF with 28 1s +#define SYMCRYPT_SI_INTPACK(High, Low) (((((UINT64)High) & SYMCRYPT_SI_INTMASK) << SYMCRYPT_SI_INTBITS) | (((UINT64)Low) & SYMCRYPT_SI_INTMASK)) +#define SYMCRYPT_SI_INTUNPACKLO(X) ((X) & SYMCRYPT_SI_INTMASK) +#define SYMCRYPT_SI_INTUNPACKHI(X) (((X) >> SYMCRYPT_SI_INTBITS) & SYMCRYPT_SI_INTMASK) + +#define SYMCRYPT_SI_INTRANGE(Low, High) (((UINT64)SYMCRYPT_SI_TYPE_INTRANGE << 56) | SYMCRYPT_SI_INTPACK(High, Low)) +#define SYMCRYPT_SI_INTPAIR(X, Y) (((UINT64)SYMCRYPT_SI_TYPE_INTPAIR << 56) | SYMCRYPT_SI_INTPACK(Y, X)) +#define SYMCRYPT_SI_SIZERANGE(Low, High) (((UINT64)SYMCRYPT_SI_TYPE_SIZERANGE << 56) | SYMCRYPT_SI_INTPACK(High, Low)) + +#define SYMCRYPT_SI_CHECK_INT(L) C_ASSERT(L <= SYMCRYPT_SI_INTMASK) + +#define SYMCRYPT_SI_KEYBITS(L) SYMCRYPT_SI_SIZERANGE(L, L) +#define SYMCRYPT_SI_MODULUS(L) SYMCRYPT_SI_SIZERANGE(L, L) +#define SYMCRYPT_SI_DSAPARAMS(N, L) SYMCRYPT_SI_INTPAIR(N, L) + + +// Services +#define SYMCRYPT_SI_SVC_ENCRYPTION 0x00000001 +#define SYMCRYPT_SI_SVC_DECRYPTION 0x00000002 +#define SYMCRYPT_SI_SVC_HASHING 0x00000004 +#define SYMCRYPT_SI_SVC_MESSAGE_AUTHENTICATION 0x00000008 +#define SYMCRYPT_SI_SVC_KEY_DERIVATION 0x00000010 +#define SYMCRYPT_SI_SVC_ASYMMETRIC_KEY_GENERATION 0x00000020 +#define SYMCRYPT_SI_SVC_ASYMMETRIC_KEY_VERIFICATION 0x00000080 +#define SYMCRYPT_SI_SVC_RANDOM_NUMBER_GENERATION 0x00000400 +#define SYMCRYPT_SI_SVC_SECRET_AGREEMENT 0x00000800 +#define SYMCRYPT_SI_SVC_SIGNATURE_GENERATION 0x00001000 +#define SYMCRYPT_SI_SVC_SIGNATURE_VERIFICATION 0x00002000 +#define SYMCRYPT_SI_SVC_KEY_ENCAPSULATION 0x00004000 +#define SYMCRYPT_SI_SVC_KEY_DECAPSULATION 0x00008000 + +// Ciphers +#define SYMCRYPT_SI_AES_CBC SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_CIPHER, 0) +#define SYMCRYPT_SI_AES_CCM SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_CIPHER, 1) +#define SYMCRYPT_SI_AES_CFB128 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_CIPHER, 2) +#define SYMCRYPT_SI_AES_CFB8 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_CIPHER, 3) +#define SYMCRYPT_SI_AES_CTR SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_CIPHER, 4) +#define SYMCRYPT_SI_AES_ECB SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_CIPHER, 5) +#define SYMCRYPT_SI_AES_GCM SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_CIPHER, 6) +#define SYMCRYPT_SI_AES_XTS SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_CIPHER, 7) +#define SYMCRYPT_SI_RC2 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_CIPHER, 8) +#define SYMCRYPT_SI_RC4 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_CIPHER, 9) +#define SYMCRYPT_SI_CHACHA SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_CIPHER, 10) +#define SYMCRYPT_SI_DES SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_CIPHER, 11) +#define SYMCRYPT_SI_TRIPLEDES SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_CIPHER, 12) +#define SYMCRYPT_SI_CHACHA20 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_CIPHER, 13) +#define SYMCRYPT_SI_CHACHA20_POLY1305 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_CIPHER, 14) +#define SYMCRYPT_SI_AES_KW SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_CIPHER, 15) +#define SYMCRYPT_SI_AES_KWP SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_CIPHER, 16) + +// Hash Functions +#define SYMCRYPT_SI_MD2 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_HASH, 0) +#define SYMCRYPT_SI_MD4 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_HASH, 1) +#define SYMCRYPT_SI_MD5 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_HASH, 2) +#define SYMCRYPT_SI_SHA1 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_HASH, 3) +#define SYMCRYPT_SI_SHA2_224 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_HASH, 4) +#define SYMCRYPT_SI_SHA2_256 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_HASH, 5) +#define SYMCRYPT_SI_SHA2_384 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_HASH, 6) +#define SYMCRYPT_SI_SHA2_512 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_HASH, 7) +#define SYMCRYPT_SI_SHA2_512_224 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_HASH, 8) +#define SYMCRYPT_SI_SHA2_512_256 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_HASH, 9) +#define SYMCRYPT_SI_SHA3_224 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_HASH, 10) +#define SYMCRYPT_SI_SHA3_256 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_HASH, 11) +#define SYMCRYPT_SI_SHA3_384 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_HASH, 12) +#define SYMCRYPT_SI_SHA3_512 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_HASH, 13) +#define SYMCRYPT_SI_SHAKE128 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_HASH, 14) +#define SYMCRYPT_SI_SHAKE256 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_HASH, 15) +#define SYMCRYPT_SI_CSHAKE128 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_HASH, 16) +#define SYMCRYPT_SI_CSHAKE256 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_HASH, 17) +#define SYMCRYPT_SI_MARVIN32 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_HASH, 18) + +// MAC +#define SYMCRYPT_SI_HMAC_MD2 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_MAC, 0) +#define SYMCRYPT_SI_HMAC_MD4 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_MAC, 1) +#define SYMCRYPT_SI_HMAC_MD5 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_MAC, 2) +#define SYMCRYPT_SI_HMAC_SHA1 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_MAC, 3) +#define SYMCRYPT_SI_HMAC_SHA2_224 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_MAC, 4) +#define SYMCRYPT_SI_HMAC_SHA2_256 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_MAC, 5) +#define SYMCRYPT_SI_HMAC_SHA2_384 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_MAC, 6) +#define SYMCRYPT_SI_HMAC_SHA2_512 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_MAC, 7) +#define SYMCRYPT_SI_HMAC_SHA2_512_224 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_MAC, 8) +#define SYMCRYPT_SI_HMAC_SHA2_512_256 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_MAC, 9) +#define SYMCRYPT_SI_HMAC_SHA3_224 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_MAC, 10) +#define SYMCRYPT_SI_HMAC_SHA3_256 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_MAC, 11) +#define SYMCRYPT_SI_HMAC_SHA3_384 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_MAC, 12) +#define SYMCRYPT_SI_HMAC_SHA3_512 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_MAC, 13) +#define SYMCRYPT_SI_KMAC128 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_MAC, 14) +#define SYMCRYPT_SI_KMAC256 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_MAC, 15) +#define SYMCRYPT_SI_AES_GMAC SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_MAC, 16) +#define SYMCRYPT_SI_AES_CMAC SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_MAC, 17) +#define SYMCRYPT_SI_AES_CBCMAC SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_MAC, 18) +#define SYMCRYPT_SI_POLY1305 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_MAC, 19) + +// KDF +#define SYMCRYPT_SI_HKDF SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_KDF, 0) +#define SYMCRYPT_SI_PBKDF SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_KDF, 1) +#define SYMCRYPT_SI_KDA_ONESTEP SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_KDF, 2) +#define SYMCRYPT_SI_KDF_IKEV1 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_KDF, 3) +#define SYMCRYPT_SI_KDF_IKEV2 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_KDF, 4) +#define SYMCRYPT_SI_KDF_SP800_108_CTR SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_KDF, 5) +#define SYMCRYPT_SI_KDF_SRTP SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_KDF, 6) +#define SYMCRYPT_SI_KDF_SSH SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_KDF, 7) +#define SYMCRYPT_SI_KDF_TLS SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_KDF, 8) +#define SYMCRYPT_SI_KDF_TLS_V12 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_KDF, 9) + +// DRBG +#define SYMCRYPT_SI_CTR_DRBG_AES256 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_DRBG, 0) + +// Asymmetric Algorithms +#define SYMCRYPT_SI_SAFE_PRIME_KEYGEN SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 0) +#define SYMCRYPT_SI_DSA_KEYGEN SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 1) +#define SYMCRYPT_SI_DSA_PQGGEN SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 2) +#define SYMCRYPT_SI_DSA_PQGVER SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 3) +#define SYMCRYPT_SI_DSA_SIGVER SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 4) + +#define SYMCRYPT_SI_ECDSA_KEYGEN SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 5) +#define SYMCRYPT_SI_ECDSA_KEYVER SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 6) +#define SYMCRYPT_SI_ECDSA_SIGGEN SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 7) +#define SYMCRYPT_SI_ECDSA_SIGVER SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 8) +#define SYMCRYPT_SI_ECDSA_SIGGEN_COMP SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 9) + +#define SYMCRYPT_SI_RSA_KEYGEN SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 10) +#define SYMCRYPT_SI_RSA_DEC_PRIM SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 12) +#define SYMCRYPT_SI_RSA_SIG_PRIM SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 13) +#define SYMCRYPT_SI_RSA_SIGGEN_PKCS15 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 14) +#define SYMCRYPT_SI_RSA_SIGGEN_PKCSPSS SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 15) +#define SYMCRYPT_SI_RSA_SIGVER_PKCS15 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 16) +#define SYMCRYPT_SI_RSA_SIGVER_PKCSPSS SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 17) + +#define SYMCRYPT_SI_KAS_ECC SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 18) +#define SYMCRYPT_SI_KAS_ECC_SSC SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 19) +#define SYMCRYPT_SI_KAS_FFC SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 20) +#define SYMCRYPT_SI_KAS_FFC_SSC SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 21) + +// PQ Algorithms + +// Asym Alg IDs for PQC algorithms in range 22-26 are replaced with more granular +// algorithms as below. +// Keeping this range reserved until there's a need to use it in the future. + +#define SYMCRYPT_SI_MLDSA_KEYGEN SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 27) +#define SYMCRYPT_SI_MLDSA_SIGGEN SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 28) +#define SYMCRYPT_SI_MLDSA_SIGVER SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 29) +#define SYMCRYPT_SI_LMS_KEYGEN SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 30) +#define SYMCRYPT_SI_LMS_SIGGEN SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 31) +#define SYMCRYPT_SI_LMS_SIGVER SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 32) +#define SYMCRYPT_SI_XMSS_KEYGEN SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 33) +#define SYMCRYPT_SI_XMSS_SIGGEN SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 34) +#define SYMCRYPT_SI_XMSS_SIGVER SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 35) +#define SYMCRYPT_SI_XMSS_MT_KEYGEN SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 36) +#define SYMCRYPT_SI_XMSS_MT_SIGGEN SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 37) +#define SYMCRYPT_SI_XMSS_MT_SIGVER SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ASYM_ALG, 38) + +#define SYMCRYPT_SI_MLKEM_KEYGEN SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_KEM, 0) +#define SYMCRYPT_SI_MLKEM_ENCAPS SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_KEM, 1) +#define SYMCRYPT_SI_MLKEM_DECAPS SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_KEM, 2) + + +// Elliptic Curves +#define SYMCRYPT_SI_ECURVE_NISTP192 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ECURVE, 0) +#define SYMCRYPT_SI_ECURVE_NISTP224 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ECURVE, 1) +#define SYMCRYPT_SI_ECURVE_NISTP256 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ECURVE, 2) +#define SYMCRYPT_SI_ECURVE_NISTP384 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ECURVE, 3) +#define SYMCRYPT_SI_ECURVE_NISTP521 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ECURVE, 4) +#define SYMCRYPT_SI_ECURVE_NUMSP256T1 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ECURVE, 5) +#define SYMCRYPT_SI_ECURVE_NUMSP384T1 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ECURVE, 6) +#define SYMCRYPT_SI_ECURVE_NUMSP512T1 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ECURVE, 7) +#define SYMCRYPT_SI_ECURVE_CURVE25519 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_ECURVE, 8) + +// Safe Prime Groups +#define SYMCRYPT_SI_SPG_FFDHE_2048 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_SAFE_PRIME_GROUP, 0) +#define SYMCRYPT_SI_SPG_FFDHE_3072 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_SAFE_PRIME_GROUP, 1) +#define SYMCRYPT_SI_SPG_FFDHE_4096 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_SAFE_PRIME_GROUP, 2) +#define SYMCRYPT_SI_SPG_FFDHE_6144 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_SAFE_PRIME_GROUP, 3) +#define SYMCRYPT_SI_SPG_FFDHE_8192 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_SAFE_PRIME_GROUP, 4) +#define SYMCRYPT_SI_SPG_MODP_2048 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_SAFE_PRIME_GROUP, 5) +#define SYMCRYPT_SI_SPG_MODP_3072 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_SAFE_PRIME_GROUP, 6) +#define SYMCRYPT_SI_SPG_MODP_4096 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_SAFE_PRIME_GROUP, 7) +#define SYMCRYPT_SI_SPG_MODP_6144 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_SAFE_PRIME_GROUP, 8) +#define SYMCRYPT_SI_SPG_MODP_8192 SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_SAFE_PRIME_GROUP, 9) + +// KAS Schemes +#define SYMCRYPT_SI_SCHEME_EPHEM_UNIFIED SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_KAS_SCHEME, 0) +#define SYMCRYPT_SI_SCHEME_DH_EPHEM SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_KAS_SCHEME, 1) +#define SYMCRYPT_SI_SCHEME_DH_ONEFLOW SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_KAS_SCHEME, 2) +#define SYMCRYPT_SI_SCHEME_DH_STATIC SYMCRYPT_SI_CREATE_ID(SYMCRYPT_SI_TYPE_KAS_SCHEME, 3) + + +UINT32 +SYMCRYPT_CALL +SymCryptDeprecatedServiceIndicator( + UINT32 Service, + UINT64 Alg, + UINT64 Param1, + UINT64 Param2, + UINT64 Param3); +// +// Returns FIPS 140 Approved Services Indicator for an algorithm. +// +// Parameters: +// - Service. Service identifier, one of SYMCRYPT_SI_SVC_XXX. +// - Alg. Identifier of the algorithm for which the status is being queried. This must be +// exactly one of the algorithm identifiers defined above. +// - Param1, Param2, Param3. Depending on the Alg parameter, these parameters provide +// additional information about the capabilities and parameters associated with an +// algorithm. For each algorithm, the number and type of the parameters must be provided +// as specified below. Any unused parameters must be passed as 0. The algorithms that require +// parameters to be specified are listed below, the remaining algorithms do not have any parameters. +// +// Alg Id Param1 Param2 +// ----------------------------- -------------------------------- --------------- +// SYMCRYPT_SI_AES_XTS SYMCRYPT_SI_KEYBITS(int) - +// SYMCRYPT_SI_DSA_PQGVER SYMCRYPT_SI_DSAPARAMS(int, int) - +// SYMCRYPT_SI_DSA_SIGVER SYMCRYPT_SI_DSAPARAMS(int, int) - +// SYMCRYPT_SI_ECDSA_KEYGEN SYMCRYPT_SI_ECURVE_XXX - +// SYMCRYPT_SI_ECDSA_KEYVER SYMCRYPT_SI_ECURVE_XXX - +// SYMCRYPT_SI_ECDSA_SIGGEN SYMCRYPT_SI_ECURVE_XXX Hash Alg Id +// SYMCRYPT_SI_ECDSA_SIGGEN_COMP SYMCRYPT_SI_ECURVE_XXX Hash Alg Id +// SYMCRYPT_SI_ECDSA_SIGVER SYMCRYPT_SI_ECURVE_XXX Hash Alg Id +// SYMCRYPT_SI_RSA_DEC_PRIM SYMCRYPT_SI_MODULUS(int) - +// SYMCRYPT_SI_RSA_KEYGEN SYMCRYPT_SI_MODULUS(int) - +// SYMCRYPT_SI_RSA_SIGGEN_PKCS15 SYMCRYPT_SI_MODULUS(int) Hash Alg Id +// SYMCRYPT_SI_RSA_SIGVER_PKCS15 SYMCRYPT_SI_MODULUS(int) Hash Alg Id +// SYMCRYPT_SI_RSA_SIGGEN_PKCSPSS SYMCRYPT_SI_MODULUS(int) Hash Alg Id +// SYMCRYPT_SI_RSA_SIGVER_PKCSPSS SYMCRYPT_SI_MODULUS(int) Hash Alg Id +// SYMCRYPT_SI_SAFE_PRIME_KEYGEN SYMCRYPT_SI_SPG_XXX Hash Alg Id +// SYMCRYPT_SI_HMAC_XXX SYMCRYPT_SI_KEYBITS(int) - +// SYMCRYPT_SI_KDA_ONESTEP Hash Alg Id or MAC alg Id - +// SYMCRYPT_SI_PBKDF MAC Alg Id - +// SYMCRYPT_SI_KDF_SP800_108_CTR MAC Alg Id - +// SYMCRYPT_SI_KDF_SSH Hash Alg Id - +// SYMCRYPT_SI_TLS_V12_KDF Hash Alg Id - +// SYMCRYPT_SI_KAS_ECC SYMCRYPT_SI_ECURVE_XXX Hash Alg Id +// SYMCRYPT_SI_KAS_ECC_SSC SYMCRYPT_SI_ECURVE_XXX SYMCRYPT_SI_SCHEME_XXX +// SYMCRYPT_SI_KAS_FFC SYMCRYPT_SI_SPG_XXX Hash Alg Id +// SYMCRYPT_SI_KAS_FFC_SSC SYMCRYPT_SI_SPG_XXX SYMCRYPT_SI_SCHEME_XXX +// SYMCRYPT_SI_LMS_SIGVER SYMCRYPT_LMS_XXX - +// SYMCRYPT_SI_XMSS_SIGVER SYMCRYPT_XMSS_XXX - +// SYMCRYPT_SI_XMSS_MT_SIGVER SYMCRYPT_XMSSMT_XXX - +// +// +// Return value: +// For the specified service and algorithm (and parameters if any), the function +// returns 0 if SymCrypt implements the algorithm in an approved manner. A non-zero +// value indicates either the algorithm is non-approved or the parameters were invalid. +// +// Remarks: +// - For parameters that contain integer values, the callers must ensure that the values +// are within the acceptable limits by using the SYMCRYPT_SI_CHECK_INT(L) macro. diff --git a/libs/symcrypt/inc/symcrypt_internal_shared.inc b/libs/symcrypt/inc/symcrypt_internal_shared.inc new file mode 100644 index 00000000000..03eae5bf7c0 --- /dev/null +++ b/libs/symcrypt/inc/symcrypt_internal_shared.inc @@ -0,0 +1,33 @@ +// +// symcrypt_internal_shared.inc +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// This is the file that contains the SymCrypt version information and defines SYMCRYPT_DEBUG. +// It is included in both C and ASM such that the values are the same on both sides. +// We use the C preprocessor to set ASM constants, as we already need to use the C preprocessor for +// symcryptasm processing (see scripts/symcryptasm_processor.py). +// +// In previous releases we had a numbering system with major/minor version number. +// This worked well with the sequential servicing imposed by SourceDepot. +// With the switch to Git this no longer works due to having multiple branches. +// We move to having the version here only specify the API and minor version number +// These will NOT be changed for every build. The API version only changes when there are +// breaking changes to the API in symcrypt.h. (Note: symcrypt_low_level.h is not stable and can change +// at any time.) The minor version is changed at regular intervals, but not necessarily at +// every build of the library. +// +// Separate from these numbers the build system includes information about the branch, +// last commit, build time, etc. +// +// The API numbering starts at 100 to avoid number conflicts with the old system. +// + +#define SYMCRYPT_CODE_VERSION_API 103 +#define SYMCRYPT_CODE_VERSION_MINOR 11 +#define SYMCRYPT_CODE_VERSION_PATCH 0 + +#if defined(DBG) +#define SYMCRYPT_DEBUG 1 +#else +#define SYMCRYPT_DEBUG 0 +#endif diff --git a/libs/symcrypt/inc/symcrypt_low_level.h b/libs/symcrypt/inc/symcrypt_low_level.h new file mode 100644 index 00000000000..296fbd48bf5 --- /dev/null +++ b/libs/symcrypt/inc/symcrypt_low_level.h @@ -0,0 +1,3137 @@ +// +// SymCrypt_low_level.h +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#pragma once + + +#ifdef __cplusplus +extern "C" { +#endif + +//======================================================================================= +// WARNING: The low-level APIs are not stable, and can change from release to release. +// The low-level APIs are only provided for certain exceptional use cases. +// All aspects of the low-level API can change in any release. +// Users are strongly advised to only rely on the API surface defined in symcrypt.h +//======================================================================================= + + +// +// Low level asymmetric algorithm API. This is not to be used by external callers. +// + +/************************************************************************************************** + Low-level Integer API + ************************************************************************************************** +The low-level API allows manipulation of arbitrarily large integers. + +The internal representation of large integers is not fixed. It depends on CPU architecture and +on the CPU features available on the exact CPU stepping the current software is running on. +In other words, it can change between different executions of the same binary. +Therefore it is critical that callers refrain from making assumptions about the internal +data format used. SymCrypt numbers should only be manipulated through the SymCrypt +API. + +The low-level API allows the caller to allocate the necessary memory for all objects. +This is typically necessary for high-IRQL level callers, callers running in low-memory +environments, and high-performance scenarios where memory has to be pre-allocated. +SymCrypt also provides routines for allocating objects, which makes the API easier +to use. The caller has to provide the allocation functions that SymCrypt uses. + +Internal data representation, and consequently the size of objects, can depend on the +exact CPU stepping the code is running on. +For robustness, the allocation size requirements are compile-time properties; +they vary per CPU architecture but do not depend on the exact available CPU features. + +General rules: +The functions in the low-level API can impose requirements on their inputs. It is imperative that +these requirements are satisfied for every call; failing to satisfy the requirements leads to undefined +behaviour, including bugchecks, access violations, wrong results, or sometimes even the right result. +CHKed versions of the library add more low-level consistency checks; all binaries should be tested +with a CHKed version of the library to detect any errors that might go unnoticed on FRE versions. + +Scratch space: +Many functions in the API require temporary storage for intermediate results. +Some function simply allocate the necessary memory using the caller-provided allocation routines. +Other low-level functions are so fast that the overhead of the allocations would significantly slow +down the computations. These functions require the caller to allocate the memory; this memory +is called the scratch space. + +For each function that requires scratch space, there is a macro that determines how much scratch space +must be provided. This macro is a compile-time function of its arguments; if the parameters to the macro +are compile-time constants, then the result is also a compile-time constant. Therefore that +the macro can be used for statically sizing arrays. +The scratch space macros are all non-decreasing in each argument. +Callers that perform multiple operations can use a single scratch space, sized for the largest +argument(s) used. Note that the SYMCRYPT_C_MAX macro implements a compile-time MAX function suitable +for combining different scratch space sizes at compile time. + +The scratch space is always passed as a pair or arguments: (pbScratch, cbScratch). +cbScratch needs to be at least as large as the macro definition requires, but may be larger. + +Functions that take scratch parameters do not require memory allocation, and will not fail due to +low-memory conditions. +All functions that use memory allocation will return an error indication if a necessary memory +allocation fails. These functions return an error code, or an object pointer which will be NULL if +the allocation fails. +Functions that do not return an error code or object pointer do not use memory allocation. + +SymCrypt uses several implementation techniques to minimize the cost of the scratch space parameters. +This is necessary because the cost of the parameter passing by itself is significant in scenarios +such as elliptic-curve operations. +In a FRE build, some functions will ignore the cbScratch parameter and simply assume they get enough space; +in this case the SymCrypt may provide an inline-able function that allow the compiler to optimize the cbScratch parameter +away, completely removing it from the actual code. +In environments where some functions don't need any scratch space, similar optimizations are possible for the +pbScratch parameter. + +Scratch and object buffers must all be aligned to SYMCRYPT_ALIGN. + +*/ + +// +// General flags +// +// SYMCRYPT_FLAG_DATA_PUBLIC is used to signal that the data being processed is public, and does not have +// to be protected from side-channel attacks. +#define SYMCRYPT_FLAG_DATA_PUBLIC (0x01) + +/* +INTEGERS + +Integers are internally represented as a sequence of Digits. An INT object with n digits can store +numbers up to (but not including) R^n where R is the _radix_ of the representation. + +The radix R, as well as the size and format of a Digit, are internal to the library, +and can depend on CPU architecture, CPU stepping and other run-time decisions. Therefore, callers +need to be especially careful not to make any assumptions about the size of a digit, or the number +of digits needed for any particular computation. + +At the same time, most INT operations are defined in terms of Digit sizes, so the caller has to +be aware of digits. This becomes important in the following example. Suppose the radix R =2^256, +and a caller wants to multiply two 384-bit numbers. It takes 2 digits to store a 384-bit number. +The caller knows that the product is 768 bits, which can fit in 3 digits. So the caller might try +to multiply two 2-digit numbers into a 3-digit result, which will not work as the result is 4 digits. + +For an INT object of n digits we call the value R^n the capacity of the object. It is the upper bound of +the values that can be stored in the object. + +Additionally, there is a maximum number of bits for any integer value that the library supports (2^20 bits +in the current version). This bound is used to ensure that no object sizes and scratch space computations +have a value of magnitude more than 32 bits. Note that the computed upper bounds are very loose and the +actual values are much smaller. + +Attempts to create objects larger than this bound will result in NULL being returned. Callers either have +to ensure they do not exceed the bounds, or check that create objects are not NULL before using them. The +rationale behind this approach is to avoid any potential route for malicious inputs to trigger DoS by +taking excessive CPU time which would be indistinguishable from an application hang. + + +Digit size and radix can vary widely; on some CPU steppings the library might use a digit that contains +128 bits are requires 16 bytes of memory, on another CPU stepping it might use a digit that contains +416 bits and uses 64 bytes of memory. + +SAL annotations: +Because the different run-time selected implementations underneath this API might use +different size memory buffers for any one operation, fully accurate SAL annotations are not possible as SAL only +performs static analysis. +Furthermore, adding size parameters to every function would add too much overhead, and sizes are often passed +implicitly. Together with the fact that the same API can be implemented by different implementations, this means +that it isn't possible to write the actual size used in a form that SAL can understand. +Instead we use the following conventions: +- Pointers to SYMCRYPT_* objects can only be created with functions that provide the right memory buffer size. +- We annotate each object-pointer with _In_ our _Out_. The SAL engine treats this as just a read/write to + a single object at the pointer location +- The CHKed version of SymCrypt adds run-time checking that the various size parameters are correct. +This allows us to have both high performance and good checking of our memory management. + +API rationale: +One important choice in this API is whether to pass a (ptr,len) for each INT or just a pointer. +We investigated this issue. The ptr-based API means that there are fewer parameters to pass around, +and generally makes the API simpler. The downside of a ptr-based API is that each INT object has some overhead +and this makes arrays of large integers less efficient, especially since the overhead can be a whole alignment +block. +The problem with the (ptr,len) format is that it isn't clear what length measure to use. +Using the bitsize is inefficient; the internal format might store 29 bits of the number in each 32-bit word, +and that means that the code would have to divide the bitsize by 29 just to find the size of the number. +Division is slow, and therefore this is not a good choice. +Another idea is to have the len parameter be the length of the INT object, in bytes. +But some APIs get really messy. For example, we need an API function to do a multiplication of two same-sized +numbers into a double-sized number. This is such a common operation that we want a separate function for it. +But the storage size of the result might not be twice the storage size of the inputs; if each number has some +fixed overhead then the output object might be smaller than the two times the size of the input objects. +This makes it impossible to write suitable SAL annotations. +For this reason, we use a ptr-based API for integers. + +Most crypto algorithms that wish to store arrays of values actually want to store arrays of elements in a +ring modulo an modulus. And for modular operations the caller is already passing the modulus separately, so +there isn't any need to store per-object size information. The API is designed to allow the ModElement for bitsize +B to be smaller than an INT for bitsize B so that implementations can choose to not store any length information in +a ModElement object. +*/ + +//======================================================================== +//======================================================================== +// Main schema for object creation, deletion, and management (low - level calls). +// +// The following are descriptions of some of the generic functions specifically +// modified for the INT, DIVISOR, MODULUS, and MODELEMENT objects. + +// +// PSYMCRYPT_XXX +// SYMCRYPT_CALL +// SymCryptXxxCreate( +// _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, +// SIZE_T cbBuffer, +// UINT32 nDigits ); +// Create an XXX object from the provided (pbBuffer, cbBuffer) space. +// The object will be able to store values up to R^nDigits where R is the digit radix. +// Requirement: +// - 1 <= nDigits <= SymCryptDigitsFromBits(SYMCRYPT_INT_MAX_BITS) +// If the value is outside these bounds it will return NULL +// - cbBuffer >= SymCryptSizeofXxxFromDigits( nDigits ) +// - (pbBuffer,cbBuffer) memory must be exclusively used by this object. +// The last requirement ensures that all objects are non-overlapping (except for API functions +// that explicitly create overlapping objects). +// All parameters are published. +// It is always safe to choose +// cbBuffer = SYMCRYPT_SIZEOF_XXX_FROM_BITS( nBits ) +// nDigits = SymCryptDigitsFromBits( nBits ) +// if the caller wants to be able to store numbers up to 2^nBits. However, it is frequently more +// efficient to use cbBuffer = SymCryptSizeofXxxFromDigits( nDigits ) as that gives the exact size for the +// current CPU stepping rather than the compile-time largest size that might be needed on any stepping. +// +// PSYMCRYPT_XXX +// SYMCRYPT_CALL +// SymCryptXxxRetrieveHandle( _In_ PBYTE pbBuffer ); +// Retrieve the object's handle from the pointer to the memory space in which the object was created via +// a call to SymCryptXxxCreate. This function allows callers to tightly store arrays of objects without having +// to keep track of each object handle. +// Requirement: +// - A call to SymCryptXxxRetrieveHandle( pbBuffer1 ) must be preceded by at least one call to +// SymCryptXxxCreate( pbBuffer2, cbBuffer2, nDigits ) with ( pbBuffer1 == pbBuffer2 ) +// If the requirement is not satisfied the result is undefined. +// +// #define SYMCRYPT_SIZEOF_XXX_FROM_BITS( nBits ) ... +// Returns a memory size that is always sufficient to create an XXX object that can handle +// values of size nBits bits, irrespective of the run-time decision of digit size. +// This is a non-decreasing compile-time function of its inputs, suitable for computing static memory allocations. +// It is always true that +// SYMCRYPT_SIZEOF_XXX_FROM_BITS( n ) >= SymCryptSizeofXxxFromDigits( SymCryptDigitsFromBits( n ) ) +// which guarantees that the n-bit XXX can be stored in a memory area of SYMCRYPT_SIZEOF_XXX_FROM_BITS(n) bytes. +// Warning: It is possible that +// SYMCRYPT_SIZEOF_XXX_FROM_BITS( n+m ) < SymCryptSizeofXxxFromDigits( SymCryptDigitsFromBits( n ) + SymCryptDigitsFromBits( m ) ) +// for some inputs n and m. This is easy to see if you choose n = m = 1; each represents a 1-digit value, but an n+m bit (i.e. a 2-bit ) value is +// also 1 digit. +// In particular, you cannot use SYMCRYPT_SIZEOF_XXX_FROM_BITS( n + m ) to compute the size +// necessary to store the product of two numbers with bitsize n and m respectively. +// It is guaranteed that +// SymCryptSizeofXxxFromDigits( SymCryptDigitsFromBits( n ) + SymCryptDigitsFromBits( m ) ) <= +// SYMCRYPT_SIZEOF_XXX_FROM_BITS( n ) + SYMCRYPT_SIZEOF_XXX_FROM_BITS( m ) +// This is the proper way to statically compute the size needed to store the product of an n- and m-bit value. +// +// UINT32 +// SYMCRYPT_CALL +// SymCryptSizeofXxxFromDigits( UINT32 nDigits ); +// Memory size that is sufficient to store an XXX object with nDigits digits. +// This is a runtime function as the # digits and size of a digit are run-time decision that depend on the CPU stepping. +// Requirement: +// - 1 <= nDigits <= SymCryptDigitsFromBits(SYMCRYPT_INT_MAX_BITS) +// If the value is outside these bounds the returned value will be 0 indicating failure. +// This function is has the following property: +// SymCryptSizeofXxxFromDigits( a + b ) <= SymCryptSizeofXxxFromDigits( a ) + SymCryptSizeofXxxFromDigits( b ) +// for all a and b. +// +// UINT32 +// SYMCRYPT_CALL +// SymCryptXxxBitsizeOfObject( PCSYMCRYPT_XXX pObj ) +// Return the number of bits of the object. +// +// UINT32 +// SYMCRYPT_CALL +// SymCryptXxxDigitsizeOfObject( PCSYMCRYPT_XXX pObj ) +// Return the number of digits of the object. +// + +//============================================================================================== +// Object types for low-level API +// +// SYMCRYPT_INT integer in range 0..N for some N +// SYMCRYPT_DIVISOR an integer > 0 that can be used to divide with. +// SYMCRYPT_MODULUS a value M > 1 to use in modulo-M computations +// SYMCRYPT_MODELEMENT An element in a modulo-M ring. +// SYMCRYPT_ECPOINT A point on an elliptic curve. +// +// See symcrypt_internal.h for definitions. +// + +//======================================================================== +//======================================================================== +// General functions for integers +// + +UINT32 +SymCryptDigitsFromBits( UINT32 nBits ); +// +// Returns the # digits needed to store values (INT, DIVISOR, MODULUS, MODELEMENT) +// in the range 0..(2^nBits - 1). +// +// Remarks: +// If nBits==0 the returned number is 1. +// +// If nBits exceeds SYMCRYPT_INT_MAX_BITS the function will return 0 to indicate an object with +// this many bits is not supported. +// +// This is a run-time decision; the return value can depend on the exact CPU stepping +// the program is running on, or run-time configurations. +// For a and b in the range 0..SYMCRYPT_INT_MAX_BITS, it is always true that +// SymCryptDigitsFromBits( a + b ) <= SymCryptDigitsFromBits( a ) + SymCryptDigitsFromBits( b ) +// + +//======================================================================== +// INT objects +// + +PSYMCRYPT_INT +SYMCRYPT_CALL +SymCryptIntAllocate( UINT32 nDigits ); + +VOID +SYMCRYPT_CALL +SymCryptIntFree( _Out_ PSYMCRYPT_INT piObj ); + +#define SYMCRYPT_SIZEOF_INT_FROM_BITS( _bitsize ) SYMCRYPT_INTERNAL_SIZEOF_INT_FROM_BITS( _bitsize ) + +UINT32 +SYMCRYPT_CALL +SymCryptSizeofIntFromDigits( UINT32 nDigits ); + +PSYMCRYPT_INT +SYMCRYPT_CALL +SymCryptIntCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + UINT32 nDigits ); + +VOID +SYMCRYPT_CALL +SymCryptIntWipe( _Out_ PSYMCRYPT_INT piObj ); + +VOID +SYMCRYPT_CALL +SymCryptIntCopy( + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_INT piDst ); // **** Documentation lacking: requires same size + +VOID +SYMCRYPT_CALL +SymCryptIntMaskedCopy( + _In_ PCSYMCRYPT_INT piSrc, + _Inout_ PSYMCRYPT_INT piDst, + UINT32 mask ); + +VOID +SYMCRYPT_CALL +SymCryptIntConditionalCopy( + _In_ PCSYMCRYPT_INT piSrc, + _Inout_ PSYMCRYPT_INT piDst, + UINT32 cond ); + +VOID +SYMCRYPT_CALL +SymCryptIntConditionalSwap( + _Inout_ PSYMCRYPT_INT piSrc1, + _Inout_ PSYMCRYPT_INT piSrc2, + UINT32 cond ); + +UINT32 +SYMCRYPT_CALL +SymCryptIntBitsizeOfObject( _In_ PCSYMCRYPT_INT piSrc ); + +UINT32 +SYMCRYPT_CALL +SymCryptIntDigitsizeOfObject( _In_ PCSYMCRYPT_INT piSrc ); + +//======================================================================== +// DIVISOR objects +// + +PSYMCRYPT_DIVISOR +SYMCRYPT_CALL +SymCryptDivisorAllocate( UINT32 nDigits ); + +VOID +SYMCRYPT_CALL +SymCryptDivisorFree( _Out_ PSYMCRYPT_DIVISOR pdObj ); + +#define SYMCRYPT_SIZEOF_DIVISOR_FROM_BITS( _bitsize ) SYMCRYPT_INTERNAL_SIZEOF_DIVISOR_FROM_BITS( _bitsize ) + +UINT32 +SYMCRYPT_CALL +SymCryptSizeofDivisorFromDigits( UINT32 nDigits ); + +PSYMCRYPT_DIVISOR +SYMCRYPT_CALL +SymCryptDivisorCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + UINT32 nDigits ); + +VOID +SYMCRYPT_CALL +SymCryptDivisorWipe( _Out_ PSYMCRYPT_DIVISOR pdObj ); + +VOID +SymCryptDivisorCopy( + _In_ PCSYMCRYPT_DIVISOR pdSrc, + _Out_ PSYMCRYPT_DIVISOR pdDst ); + +UINT32 +SYMCRYPT_CALL +SymCryptDivisorDigitsizeOfObject( _In_ PCSYMCRYPT_DIVISOR pdSrc ); + +//======================================================================== +// MODULUS objects +// + +PSYMCRYPT_MODULUS +SYMCRYPT_CALL +SymCryptModulusAllocate( UINT32 nDigits ); + +VOID +SYMCRYPT_CALL +SymCryptModulusFree( _Out_ PSYMCRYPT_MODULUS pmObj ); + +#define SYMCRYPT_SIZEOF_MODULUS_FROM_BITS( _bitsize ) SYMCRYPT_INTERNAL_SIZEOF_MODULUS_FROM_BITS( _bitsize ) + +UINT32 +SYMCRYPT_CALL +SymCryptSizeofModulusFromDigits( UINT32 nDigits ); + +PSYMCRYPT_MODULUS +SYMCRYPT_CALL +SymCryptModulusCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + UINT32 nDigits ); + +VOID +SYMCRYPT_CALL +SymCryptModulusWipe( _Out_ PSYMCRYPT_MODULUS pmObj ); + +VOID +SymCryptModulusCopy( + _In_ PCSYMCRYPT_MODULUS pmSrc, + _Out_ PSYMCRYPT_MODULUS pmDst ); + +UINT32 +SYMCRYPT_CALL +SymCryptModulusDigitsizeOfObject( _In_ PCSYMCRYPT_MODULUS pmSrc ); + +//======================================================================== +// MODELEMENT objects are treated slightly differently because it does not store its own size. +// This allows a MODELEMENT to be more compact which makes large arrays of ModElements more efficient +// and avoids checking that ModElements have the same size. +// All operations require a modulus to be passed. +// + +PSYMCRYPT_MODELEMENT +SYMCRYPT_CALL +SymCryptModElementAllocate( _In_ PCSYMCRYPT_MODULUS pmMod ); + +VOID +SYMCRYPT_CALL +SymCryptModElementFree( + _In_ PCSYMCRYPT_MODULUS pmMod, // only used to determine the digit size of peObj. + _Out_ PSYMCRYPT_MODELEMENT peObj ); + +#define SYMCRYPT_SIZEOF_MODELEMENT_FROM_BITS( _bitsize ) SYMCRYPT_INTERNAL_SIZEOF_MODELEMENT_FROM_BITS( _bitsize ) + +UINT32 +SYMCRYPT_CALL +SymCryptSizeofModElementFromModulus( PCSYMCRYPT_MODULUS pmMod ); + +PSYMCRYPT_MODELEMENT +SYMCRYPT_CALL +SymCryptModElementCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + _In_ PCSYMCRYPT_MODULUS pmMod ); + +VOID +SYMCRYPT_CALL +SymCryptModElementWipe( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peDst ); + +VOID +SymCryptModElementCopy( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst ); + +VOID +SymCryptModElementMaskedCopy( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst, + UINT32 mask ); + +VOID +SymCryptModElementConditionalSwap( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_ PSYMCRYPT_MODELEMENT peData1, + _Inout_ PSYMCRYPT_MODELEMENT peData2, + _In_ UINT32 cond ); + +//======================================================================== +// ECURVE objects + +BOOLEAN +SYMCRYPT_CALL +SymCryptEcurveBufferSizesFromParams( + _In_ PCSYMCRYPT_ECURVE_PARAMS pParams, + _Out_ SIZE_T * pcbCurve, + _Out_ SIZE_T * pcbScratch ); +// +// This call computes the memory size necessary to create the ECURVE object described by pParams, +// including the amount of scratch space needed for the operation. +// +// Returns FALSE if the given parameters are deemed invalid. +// + +PSYMCRYPT_ECURVE +SYMCRYPT_CALL +SymCryptEcurveCreate( + _In_ PSYMCRYPT_ECURVE_PARAMS pParams, + _In_ UINT32 flags, + _Out_writes_bytes_( cbCurve ) PBYTE pbCurve, + SIZE_T cbCurve, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Use caller-allocated memory to create an ECURVE object which +// is defined by the parameters in pParams. +// +// - pParams: parameters that define the curve +// - flags: Not used, must be zero. +// - pbCurve: caller-allocated memory region to hold the curve object +// - cbCurve: size of memory region to hold the curve object +// - pbScratch: caller-allocated memory region used as scratch space to create the curve +// - cbScratch: size of scratch space memory region +// +// Caller should use SymCryptSizeofEcurveBuffersFromParams to determine the necessary sizes for +// pbCurve and pbScratch. These buffers must be SYMCRYPT_ALIGNed. +// +// Future versions might use the flags to enable different features/tradeoffs. +// There are a number of interesting memory/speed/pre-computation cost trades that can be made. +// For example, pre-computing multiples of the distinguished point, or (parallel?) pre-computation +// of (r, rG) pairs for random r values. +// +// This function applies limited validation of the pParams. The validation is intended to eliminate +// the threat of denial-of-service when hostile parameters are presented. It does not ensure that +// the parameters make sense, define a proper curve, or that any elliptic-curve operations made on +// the curve built from these parameters will fail, succeed or provide any security. +// The only guarantee provided for invalid parameters is that all operations on this curve will +// not crash and will return in some reasonable amount of time. +// +// Returns NULL if the given memory regions are not large enough or the +// parameters are deemed invalid. If the return value is not NULL, then +// pbCurve buffer must later be wiped with SymCryptWipe(). And as with all +// pbScratch buffers, it is the caller's responsibility to wipe after +// completing all operations that require scratch space. +// + +//======================================================================== +// ECPOINT objects' API is slightly different than the above API schema in the sense that they +// take as input an ECURVE object pointer instead of the number of digits. +// + +PSYMCRYPT_ECPOINT +SYMCRYPT_CALL +SymCryptEcpointAllocate( _In_ PCSYMCRYPT_ECURVE pCurve ); + +VOID +SYMCRYPT_CALL +SymCryptEcpointFree( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Out_ PSYMCRYPT_ECPOINT poDst ); + +UINT32 +SYMCRYPT_CALL +SymCryptSizeofEcpointFromCurve( PCSYMCRYPT_ECURVE pCurve ); + +PSYMCRYPT_ECPOINT +SYMCRYPT_CALL +SymCryptEcpointCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + _In_ PCSYMCRYPT_ECURVE pCurve ); +// The above can take as input a pointer to a curve that has only the FMod, cbModElement, and the +// eformat fields set + +PSYMCRYPT_ECPOINT +SYMCRYPT_CALL +SymCryptEcpointRetrieveHandle( _In_ PBYTE pbBuffer ); + +VOID +SYMCRYPT_CALL +SymCryptEcpointWipe( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Out_ PSYMCRYPT_ECPOINT poDst ); + +VOID +SymCryptEcpointCopy( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_ PSYMCRYPT_ECPOINT poDst ); + +VOID +SymCryptEcpointMaskedCopy( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_ PSYMCRYPT_ECPOINT poDst, + UINT32 mask ); + + +//======================================== +// Integer operations +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptIntCopyMixedSize( + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_INT piDst ); +// +// Dst = Src, but allows Dst and Src to have different # digits. +// +// Copy the value from piSrc to piDst. +// Returns success if Src < R^Dst.nDigits +// If Src >= R^Dst.nDigits then the value in Src is published and an error is returned. +// Warning: it is not side-channel safe to use this function with a Src value that can't fit in Dst. +// Src and Dst may be the same object. +// + +UINT32 +SYMCRYPT_CALL +SymCryptIntBitsizeOfValue( _In_ PCSYMCRYPT_INT piSrc ); +// +// Returns the number of bits necessary to store the value of Src. +// +// Let V be the value of Src. +// Then this function returns +// 0 if Src == 0 +// 1 + floor( log(Src)/log(2) ) if V > 0 +// Note that there is no defined relationship between the result of this function and the bitsize used to allocate Src. +// Digits can be large, so the value Src might be able to store values much larger than 2^b where b is the bitsize +// used when creating Src. +// This function is side-channel safe, and as a result might be slower than expected. +// + + +VOID +SYMCRYPT_CALL +SymCryptIntSetValueUint32( + UINT32 u32Src, + _Out_ PSYMCRYPT_INT piDst ); +// +// Dst = Src +// This always succeeds as R >= 2^32 on all implementations. +// + +VOID +SYMCRYPT_CALL +SymCryptIntSetValueUint64( + UINT64 u64Src, + _Out_ PSYMCRYPT_INT piDst ); +// +// Dst = Src +// This always succeeds as R >= 2^64 on all implementations. +// + + +//======================================================================================== +// Read/write INTegers in defined formats +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptIntSetValue( + _In_reads_bytes_(cbSrc) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_NUMBER_FORMAT format, + _Out_ PSYMCRYPT_INT piDst ); +// +// Set the value of an INT object from an array of bytes +// +// (pbSrc,cbSrc): buffer that contains the bytes that encode the value in the specified format. +// format: specifies the format of the pbBytes/cbBytes buffer. +// Dst : INT object that receives the value; must previously have been created/allocated. +// +// Return value: +// If the value encoded in the (pbSrc,cbSrc) buffer fits in Dst, then the +// function succeeds. If the value does not fit, then the function +// returns an error. Note that the error condition is only dependent on the value in the input, +// and not on how many bytes are in the input. Importing a very large (pbSrc,cbSrc) buffer +// into a small piDst is fine as long as the value fits in the number (i.e. enough of the most significant +// bytes in the buffer are zero). +// +// Warning: +// Error return values are always published, so if this function fails it is visible to the attacker. +// +// Rationale: +// Because the size of a digit can be any size (even odd) there are always scenarios in which the +// caller can provide an input that is too large for the INT to store. (Restricting only the size of +// the input buffer is not sufficient.) And if we have to handle this +// in one case, we might as well handle it in all cases. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptIntGetValue( + _In_ PCSYMCRYPT_INT piSrc, + _Out_writes_bytes_( cbDst) PBYTE pbDst, + SIZE_T cbDst, + SYMCRYPT_NUMBER_FORMAT format ); +// +// Convert a value from the internal number representation to a byte array. +// +// Src is the number whose value is to be stored in a byte array +// (pbDst, cbDst) the destination buffer +// format: the destination format. +// Return value: if the value of Src when encoded in the format fits in the output buffer then the function succeeds. +// If the encoded value does not fit, the function returns an error. (Note: All errors are published.) +// + +UINT32 +SYMCRYPT_CALL +SymCryptIntGetValueLsbits32( _In_ PCSYMCRYPT_INT piSrc ); +// +// Returns Src mod 2^32 +// +// Usecase: there are many number-theoretic algorithms where the algorithm +// depends on (n mod 8) or similar values. +// + +UINT64 +SYMCRYPT_CALL +SymCryptIntGetValueLsbits64( _In_ PCSYMCRYPT_INT piSrc ); +// +// Returns Src mod 2^64 +// +// Usecase: RSA public exponents can be 64 bits, and validating that +// a candidate prime is suitable uses this function +// + +UINT32 +SYMCRYPT_CALL +SymCryptIntIsEqualUint32( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ UINT32 u32Src2 ); +// +// Returns a mask value which is 0xffffffff if Src1 = Src2 and 0 otherwise. +// + +UINT32 +SYMCRYPT_CALL +SymCryptIntIsEqual( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2 ); +// +// Returns a mask value which is 0xffffffff if Src1 = Src2 and 0 otherwise. +// +// Note that Src1 and Src2 can be of different sizes. +// + +UINT32 +SYMCRYPT_CALL +SymCryptIntIsLessThan( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2 ); +// +// Returns a mask value which is 0xffffffff if Src1 < Src2 and 0 otherwise. +// +// Note that a <= b is equivalent to NOT( b < a ) so all possible comparisons +// can be made using the < and = comparison primitive. +// + + +//============================================================= +// Addition & subtraction +// For all addition and subtraction operations, the destination may be +// the same object as one of the inputs if the other requirements of the function +// allow that. +// + +UINT32 +SYMCRYPT_CALL +SymCryptIntAddUint32( + _In_ PCSYMCRYPT_INT piSrc1, + UINT32 u32Src2, + _Out_ PSYMCRYPT_INT piDst ); +// +// Dst = Src1 + Src2. +// Requirement: Dst.nDigits == Src1.nDigits +// If the result is larger than the capacity of Dst, then +// Dst is set to the result minus the capacity and the value 1 is returned. +// Otherwise the Dst is set to the sum and the value 0 is returned. +// The return value is thus a carry output of the addition. +// + +UINT32 +SYMCRYPT_CALL +SymCryptIntAddSameSize( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + _Out_ PSYMCRYPT_INT piDst ); +// +// Dst = Src1 + Src2. +// Requirement: Src1.nDigits == Src2.nDigits == Dst.nDigits +// In more detail: +// if Src1 + Src2 < Dst.capacity: +// Dst = Src1 + Src2 +// return 0 +// else +// Dst = Src1 + Src2 - Dst.capacity +// return 1 +// The return value is a carry output of the addition. +// +// Dst may be the same object as Src1, Src2, or both. +// + +UINT32 +SYMCRYPT_CALL +SymCryptIntAddMixedSize( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + _Out_ PSYMCRYPT_INT piDst ); +// +// Dst = Src1 + Src2. +// Requirement: Dst.nDigits >= max( Src1.nDigits, Src2.nDigits ) +// In more detail: +// if Src1 + Src2 < Dst.capacity: +// Dst = Src1 + Src2 +// return 0 +// else +// Dst = Src1 + Src2 - Dst.capacity +// return 1 +// The return value is a carry output of the addition. +// +// Dst may be the same object as Src1, Src2, or both. +// + +// +// Subtraction +// Subtraction functions are the equivalent of addition functions. +// The return value is 1 if an underflow occurred (borrow), and 0 if no underflow/borrow occurred. +// On underflow, the value of the result is the result of the subtraction plus Dst.capacity. +// +// Rationale: For an underflow we could also return (UINT32)-1 or return -1 on a INT32. +// -1 in an unsigned type is actually 2^32 -1 which makes no sense. +// Returning a signed type is somewhat neater, but all other values are unsigned, and mixing +// signed and unsigned types is always error-prone. Furthermore, converting from a signed integer +// to a mask is also error-prone (at least within the behaviour guaranteed by the C standard.) +// Returning an unsigned 1 is therefore preferred. +// + +UINT32 +SYMCRYPT_CALL +SymCryptIntSubUint32( + _In_ PCSYMCRYPT_INT piSrc1, + UINT32 Src2, + _Out_ PSYMCRYPT_INT piDst ); + +UINT32 +SYMCRYPT_CALL +SymCryptIntSubSameSize( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + _Out_ PSYMCRYPT_INT piDst ); + +UINT32 +SYMCRYPT_CALL +SymCryptIntSubMixedSize( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + _Out_ PSYMCRYPT_INT piDst ); + +VOID +SYMCRYPT_CALL +SymCryptIntNeg( + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_INT piDst ); + +// +// Dst = (- Src) mod Dst.Capacity; +// Requirement: +// - Dst.nDigits == Src.nDigits; +// This is a negate modulo the capacity. +// Useful when you want the absolute value of a difference. +// Compute the difference, and if the subtraction yields a carry, negate the result. +// + +//=================================================================== +// Shifts +// Note that the shift amount is always published. +// If the need arises, we can define variants that are side-channel safe +// w.r.t. the shift size, but that incurs a significant performance cost. +// + +VOID +SYMCRYPT_CALL +SymCryptIntMulPow2( + _In_ PCSYMCRYPT_INT piSrc, + SIZE_T exp, + _Out_ PSYMCRYPT_INT piDst ); +// +// Dst = (Src * 2^Exp ) mod R^n where n = Dst.nDigits. +// Requirement: Dst.nDigits == Src.nDigits, Dst == Src is allowed +// Exp is published. +// +// A variant that keeps Exp private is currently not available, but can be added to the API if needed. +// (A side-channel safe variant might require scratch space.) +// +// Dst may be the same object as Src1. +// + +VOID +SYMCRYPT_CALL +SymCryptIntDivPow2( + _In_ PCSYMCRYPT_INT piSrc, + SIZE_T exp, + _Out_ PSYMCRYPT_INT piDst ); +// +// Dst = (Src div 2^Exp ) +// Requirement: Dst.nDigits == Src.nDigits, Dst == Src is allowed +// Exp is published +// +// A variant that keeps Exp private is currently not available, but can be added to the API if needed. +// (A side-channel safe variant might require scratch space.) +// +// Dst may be the same object as Src1. +// + +VOID +SYMCRYPT_CALL +SymCryptIntShr1( + UINT32 highestBit, + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_INT piDst ); +// +// Dst = (Src + highestBit * Src.Capacity) div 2 +// +// Requirements: +// Src.nDigits == Dst.nDigits +// highestBit <= 1 +// +// This is the Int equivalent of the 'shift right 1' instruction. +// Shifting by one can be implemented faster than variable sized shifts. +// + +VOID +SYMCRYPT_CALL +SymCryptIntModPow2( + _In_ PCSYMCRYPT_INT piSrc, + SIZE_T exp, + _Out_ PSYMCRYPT_INT piDst ); +// +// Dst = Src mod 2^Exp +// Requirement: Dst.nDigits == Src.nDigits, Dst == Src is allowed +// Exp is published +// +// Dst may be the same object as Src1. +// + +UINT32 +SYMCRYPT_CALL +SymCryptIntGetBit( + _In_ PCSYMCRYPT_INT piSrc, + UINT32 iBit ); +// +// Returns the i-th bit (starting from 0 for the LSB) of piSrc. +// Therefore the only possible return values are 0 and 1. +// +// Requirements: +// - iBit < SymCryptIntBitsizeOfObject( piSrc ) +// + +UINT32 +SYMCRYPT_CALL +SymCryptIntGetBits( + _In_ PCSYMCRYPT_INT piSrc, + UINT32 iBit, + UINT32 nBits ); +// +// Returns the bits from position iBit up to (iBit + nBits - 1) +// (starting from 0 for the LSB). Total of nBits. The 0-th bit of +// the return value corresponds to the iBit-th bit of the source. +// +// Requirements: +// - 1 <= nBits <= 32 +// - iBit + nBits <= SymCryptIntBitsizeOfObject( piSrc ) +// +// Remarks: +// - The values iBit and nBits are not protected by side-channel attacks, +// therefore they should be treated as published. +// - The bits of the return value after the (nBits)-th bit are zero. +// + +VOID +SYMCRYPT_CALL +SymCryptIntSetBits( + _In_ PSYMCRYPT_INT piDst, + UINT32 value, + UINT32 iBit, + UINT32 nBits ); +// +// Sets the bits from position iBit up to (iBit + nBits - 1) +// (starting from 0 for the LSB). Total of nBits. The 0-th bit of +// the input value corresponds to the iBit-th bit of the destination. +// +// Requirements: +// - 1 <= nBits <= 32 +// - iBit + nBits <= SymCryptIntBitsizeOfObject( piSrc ) +// +// Remarks: +// - The values iBit and nBits are not protected by side-channel attacks, +// therefore they should be treated as published. +// - The bits of the value after the (nBits)-th bit are ignored. +// + +//=========================================================== +// Mul & div +// + +UINT32 +SYMCRYPT_CALL +SymCryptIntMulUint32( + _In_ PCSYMCRYPT_INT piSrc1, + UINT32 Src2, + _Out_ PSYMCRYPT_INT piDst ); +// +// Dst = Src1 * Src2 mod Dst.capacity; return value = Src1 * Src2 div Dst.capacity +// Requirement: piDst.nDigits == piSrc1.nDigits, Dst == Src is allowed +// + +#define SYMCRYPT_SCRATCH_BYTES_FOR_INT_MUL( _nResultDigits ) SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_INT_MUL( _nResultDigits ) + +VOID +SYMCRYPT_CALL +SymCryptIntMulSameSize( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + _Out_ PSYMCRYPT_INT piDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Dst = Src1 * Src2. +// Requirement: +// - Src1.nDigits == Src2.nDigits; Dst.nDigits == Src1.nDigits + Src2.nDigits +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_INT_MUL( Dst.nDigits ) +// +// Note that Dst cannot be the same object as Src1 or Src2 because of the size restrictions. +// + +VOID +SYMCRYPT_CALL +SymCryptIntSquare( + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_INT piDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Dst = Src^2 +// Requirement: +// - Dst.nDigits == 2 * Src.nDigits +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_INT_MUL( Dst.nDigits ) +// +// Note that Dst cannot be the same object as Src1 or Src2 because of the size restrictions. +// + +VOID +SYMCRYPT_CALL +SymCryptIntMulMixedSize( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + _Out_ PSYMCRYPT_INT piDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Dst = Src1 * Src2. +// Requirement: +// - Dst.nDigits >= Src1.nDigits + Src2.nDigits +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_INT_MUL( Dst.nDigits ) +// +// Note that Dst cannot be the same object as Src1 or Src2 because of the size restrictions. +// + + +// +// Division +// For all division and modulo operations, there are pre-computations that have to be done +// on the divisor. The pre-computed divisor information is stored in a DIVISOR object. +// Note that the bitsize of the value of the divisor is published. +// Therefore, a generic division is not side-channel safe. +// Rationale: Hiding the bitsize of the value of the divisor is quite expensive, +// and we have no cryptographic algorithms that require it. +// + + +PSYMCRYPT_INT +SYMCRYPT_CALL +SymCryptIntFromDivisor( _In_ PSYMCRYPT_DIVISOR pdSrc ); +// +// Returns the INT object inside the DIVISOR object. +// Digit size of the INT object is equal to the digit size of the DIVISOR object. +// This object has two uses: +// - On an uninitialized DIVISOR object it is a suitable place to put a value before calling +// SymCryptIntToDivisor. +// - On an initialized DIVISOR object the function returns a pointer to the INT that contains +// the divisor value. Modifying the INT value from an initialized DIVISOR value corrupts +// the divisor value. +// +// This is typically a very fast function, with a run-time cost that is zero or only one instruction. +// + +#define SYMCRYPT_SCRATCH_BYTES_FOR_INT_TO_DIVISOR( _nDigits ) SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_INT_TO_DIVISOR( _nDigits ) + +VOID +SYMCRYPT_CALL +SymCryptIntToDivisor( + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_DIVISOR pdDst, + UINT32 totalOperations, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Create a DIVISOR object from an INT. +// Requirement: +// - Dst.nDigits == Src.nDigits +// - Src != 0 +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_INT_TO_DIVISOR( Src.nDigits ) +// SymCryptIntBitsizeOfValue( Src ) is published. +// Src may be equal to SymCryptIntFromDivisor( Dst ). +// totalOperations is an estimate of how many divide/modulo operations will be performed with this divisor. +// An implementation may use this to decide how much pre-computations to do. +// flags: any combination of the following flag values: +// - SYMCRYPT_FLAG_DATA_PUBLIC +// Signals that the Src value is public. +// Implementations can use this to use more efficient divisor algorithms depending on the actual value of Src. +// For example, if Src is very close to a power of 2, division can be implemented more efficiently. +// +// Once a divisor object has been created, it is immutable. +// Multiple threads can use the same divisor object for different division operations in parallel. +// + +#define SYMCRYPT_SCRATCH_BYTES_FOR_INT_DIVMOD( _nSrcDigits, _nDivisorDigits ) SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_INT_DIVMOD( _nSrcDigits, _nDivisorDigits ) + +VOID +SYMCRYPT_CALL +SymCryptIntDivMod( + _In_ PCSYMCRYPT_INT piSrc, + _In_ PCSYMCRYPT_DIVISOR pdDivisor, + _Out_opt_ PSYMCRYPT_INT piQuotient, + _Out_opt_ PSYMCRYPT_INT piRemainder, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Quotient = Src div Divisor +// Remainder = Src mod Divisor +// Quotient & Remainder may be NULL in which case that result is not returned. +// Requirements: +// - Quotient.nDigits >= Src.nDigits +// - Remainder.nDigits >= Divisor.nDigits +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_INT_DIVMOD( Src.nDigits, Divisor.nDigits ) +// Quotient and Remainder must be different objects. +// Src may be the same object as either Quotient or Remainder. +// + +#define SYMCRYPT_SCRATCH_BYTES_FOR_EXTENDED_GCD( _nDigits ) SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_EXTENDED_GCD( _nDigits ) + +VOID +SYMCRYPT_CALL +SymCryptIntExtendedGcd( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + UINT32 flags, + _Out_opt_ PSYMCRYPT_INT piGcd, + _Out_opt_ PSYMCRYPT_INT piLcm, + _Out_opt_ PSYMCRYPT_INT piInvSrc1ModSrc2, + _Out_opt_ PSYMCRYPT_INT piInvSrc2ModSrc1, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Compute up to four results from Src1 and Src2. +// GCD is the greatest common divisor of Src1 and Src2. +// LCM is the Least Common Multiple of Src1 and Src2. +// InvSrc1ModSrc2 is the smallest value such that (InvSrc1ModSrc2 * Src1) mod Src2= GCD( Src1, Src2 ) +// UNLESS Src1 is a multiple of Src2, i.e. when Src1 = 0 mod Src2. In this case the result is +// undefined. +// InvSrc2ModSrc1 is the smallest value such that (InvSrc2ModSrc1 * Src2) mod Src1= GCD( Src1, Src2 ) +// UNLESS Src2 is a multiple of Src1, i.e. when Src2 = 0 mod Src1. In this case the result is +// undefined. +// +// The last two modular inverse values are not true modular inverses unless GCD( Src1, Src2 ) = 1. +// +// Any of the output pointers can be NULL and then that result is not returned. +// Requirements: +// - Src1 > 0 +// - Src2 > 0 and Src2 odd +// - Gcd.nDigits >= min( Src1.nDigits, Src2.nDigits ) +// - Lcm.nDigits >= Src1.nDigits + Src2.nDigits +// - InvSrc1ModSrc2.nDigits >= max(Src1.nDigits, Src2.nDigits) // Future work: Make these bounds Src2 and Src1 respectively. +// - InvSrc2ModSrc1.nDigits >= max(Src1.nDigits, Src2.nDigits) +// - if piInvSrc2ModSrc1 is not NULL, max( Src1.nDigits, Src2.nDigits ) * 2 <= SymCryptDigitsFromBits(SYMCRYPT_INT_MAX_BITS) +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_EXTENDED_GCD( max( Src1.nDigits, Src2.nDigits ) ) +// +// If only one inverse value is needed, it is most efficient to use only InvSrc1ModSrc2. +// +// The restriction that Src2 must be odd can be removed in a future version. +// The SYMCRYPT_FLAG_DATA_PUBLIC flag signals that the inputs are public information and do not have +// to be side-channel protected. +// The SYMCRYPT_FLAG_GCD_INPUTS_NOT_BOTH_EVEN signals that at least one input is odd. This speeds up the +// side-channel safe implementation; this flag is not needed if the inputs are signaled as public as the code can then +// afford to check that condition and change use a optimized algorithm. +// The SYMCRYPT_FLAG_GCD_PUBLIC signals that the GCD value is public. This can make some computations +// (of the inverses) more efficient when GCD = 1. +// + +#define SYMCRYPT_FLAG_GCD_INPUTS_NOT_BOTH_EVEN (0x02) +#define SYMCRYPT_FLAG_GCD_PUBLIC (0x04) + + +UINT64 +SYMCRYPT_CALL +SymCryptUint64Gcd( UINT64 a, UINT64 b, UINT32 flags ); +// +// Return GCD of two 64-bit integers. +// a, b : inputs to the GCD +// flags: +// - SYMCRYPT_FLAG_DATA_PUBLIC signals that a and b are public values (w.r.t. side-channel safety) +// This may improve performance. +// - SYMCRYPT_FLAG_GCD_INPUTS_NOT_BOTH_EVEN: signals that at least one of (a,b) is odd. This +// simplifies & speeds up the GCD computation. +// +// Note: +// The current implementation requires that the INPUTS_NOT_BOTH_EVEN flag is set (and at least one input be odd). +// Also note that GCD(x, 0) == GCD(0, x) == x +// + + +#define SYMCRYPT_SCRATCH_BYTES_FOR_CRT_GENERATION( _nDigits ) SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_CRT_GENERATION( _nDigits ) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptCrtGenerateInverses( + UINT32 nCoprimes, + _In_reads_( nCoprimes ) PCSYMCRYPT_MODULUS * ppmCoprimes, + UINT32 flags, + _Out_writes_( nCoprimes ) PSYMCRYPT_MODELEMENT * ppeCrtInverses, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Compute the Chinese Remainder Theorem (CRT) constants for a set of nCoprimes +// pairwise coprime moduli. Pointers to the input numbers are stored in the array of +// pointers ppmCoprimes, while the outputs are stored to the locations pointed by +// ppeCrtInverses. +// +// For input numbers Src1, Src2, ..., SrcK where K = nCoprimes, let N = Src1*Src2*...*SrcK. +// Then this function outputs the constants: +// (( Src1 / N ) mod Src1), (( Src2 / N ) mod Src2), ..., (( SrcK / N ) mod SrcK) +// +// The most common case is for the RSA algorithm where the inputs are 2 prime numbers P and Q +// and only Q^{-1} mod P is needed (i.e. only the first term of the output array). +// +// Any of the output pointers in the ppeCrtInverses can be NULL and then that result +// is not returned (resulting in a faster total running time). +// +// The number of inputs nCoprimes and which outputs are returned is public. +// +// Requirements: +// - nCoprimes >= 2 +// - Both ppmCoprimes and ppeCrtInverses must be arrays of pointers of exactly nCoprimes pointers. +// - ppmCoprimes[i] != NULL for all i in [0, nCoprimes-1]. +// - The input moduli must be pairwise coprime. +// - The number of digits of all input moduli must match the number of digits of the corresponding +// output modelements. +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_CRT_GENERATION( nDigits ) where nDigits is the maximum number +// of digits of the inputs and outputs. +// + +#define SYMCRYPT_SCRATCH_BYTES_FOR_CRT_SOLUTION( _nDigits ) SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_CRT_SOLUTION( _nDigits ) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptCrtSolve( + UINT32 nCoprimes, + _In_reads_( nCoprimes ) PCSYMCRYPT_MODULUS * ppmCoprimes, + _In_reads_( nCoprimes ) PCSYMCRYPT_MODELEMENT * ppeCrtInverses, + _In_reads_( nCoprimes ) PCSYMCRYPT_MODELEMENT * ppeCrtRemainders, + UINT32 flags, + _Out_ PSYMCRYPT_INT piSolution, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Solve for x the system of nCoprimes congruences of the form +// x = ppeCrtRemainders[0] (mod ppmCoprimes[0]) +// x = ppeCrtRemainders[1] (mod ppmCoprimes[1]) +// ... +// x = ppeCrtRemainders[nCoprimes-1] (mod ppmCoprimes[nCoprimes-1]) +// +// The input array ppeCrtInverses must have been pre-computed by a call to SymCryptCrtGenerateInverses. +// +// The number of inputs nCoprimes is public. +// +// Requirements: +// - nCoprimes == 2 +// - ppmCoprimes, ppeCrtInverses, and ppeCrtRemainders must be arrays of pointers of exactly nCoprimes elements. All +// of them non-NULL. +// - piSolution must be large enough to hold the result modulo the product of all the coprimes. +// - max( ppmCoprimes[0].nDigits, ppmCoprimes[1].nDigits ) * 2 <= SymCryptDigitsFromBits(SYMCRYPT_INT_MAX_BITS) +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_CRT_SOLUTION( nDigits ) where nDigits is the maximum number +// of digits of the input moduli. +// + + +typedef const struct _SYMCRYPT_TRIALDIVISION_CONTEXT *PCSYMCRYPT_TRIALDIVISION_CONTEXT; + +PCSYMCRYPT_TRIALDIVISION_CONTEXT +SYMCRYPT_CALL +SymCryptCreateTrialDivisionContext( UINT32 nDigits ); +// +// Create a trial division context that can be used for integers up to and including nDigits digits. +// The Trial division context can be used in multiple threads in parallel. +// The context should be freed with SymCryptFreeTrialDivisionContext after use. +// A context can be fairly large (100 kB) so freeing it is important. +// Returns NULL if out of memory or an invalid digit count is provided. +// + +VOID +SYMCRYPT_CALL +SymCryptFreeTrialDivisionContext( PCSYMCRYPT_TRIALDIVISION_CONTEXT pContext ); +// +// Free the trial division context after use. +// + +UINT32 +SYMCRYPT_CALL +SymCryptIntFindSmallDivisor( + _In_ PCSYMCRYPT_TRIALDIVISION_CONTEXT pContext, + _In_ PCSYMCRYPT_INT piSrc, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Returns a divisor of piSrc, or zero. +// Requirements: +// Requirement: +// - pContext is a valid trial division context, and Context.nDigits >= Src.nDigits +// - Src >= 2 +// Note: +// - Src is published if this function returns a divisor. +// +// There is no guarantee that this function finds small divisors; +// it is valid for the implementation to always return 0. +// Any nonzero return value is always >= 2 and an actual divisor of Src. +// Note: this function might not find 2 as a small divisor. +// + +#define SYMCRYPT_SCRATCH_BYTES_FOR_INT_IS_PRIME( _nDigits ) SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_INT_IS_PRIME( _nDigits ) + +UINT32 +SYMCRYPT_CALL +SymCryptIntMillerRabinPrimalityTest( + _In_ PCSYMCRYPT_INT piSrc, + UINT32 nBitsSrc, + UINT32 nIterations, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Applies the Miller-Rabin prime testing algorithm using nIterations on the integer +// piSrc. +// +// The maximum bitsize of the value of piSrc is equal to nBitsSrc and it is public. +// The number of iterations nIterations is also public. +// +// If the return value is 0, then Src is guaranteed to be a composite value. +// In this case, the value of Src is treated as public. +// +// If the return value is 0xffffffff, then Src might be prime. +// In this case, the value of Src is treated as private except when the +// SYMCRYPT_FLAG_DATA_PUBLIC flag is specified. +// +// If the flag SYMCRYPT_FLAG_DATA_PUBLIC is specified the +// algorithm leaks the number of trailing zeros of Src-1. The reason for +// not having a fully side-channel safe implementation for arbitrary +// numbers is that such a function would be prohibitively slow. +// +// Requirements: +// - SymCryptIntBitsizeOfValue( piSrc ) <= nBitsSrc <= SymCryptIntBitsizeOfObject( piSrc ) +// - Src is odd and greater than 3. +// - If flags == 0 then Src must be 3 modulo 4. (See the comment above for +// the SYMCRYPT_FLAG_DATA_PUBLIC flag) +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_INT_IS_PRIME( Src.nDigits ) +// + +// + +#define SYMCRYPT_SCRATCH_BYTES_FOR_INT_PRIME_GEN( _nDigits ) SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_INT_PRIME_GEN( _nDigits ) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptIntGenerateRandomPrime( + _In_ PCSYMCRYPT_INT piLow, + _In_ PCSYMCRYPT_INT piHigh, + _In_reads_opt_( nPubExp ) PCUINT64 pu64PubExp, + UINT32 nPubExp, + UINT32 nTries, + UINT32 flags, + _Inout_ PSYMCRYPT_INT piDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// This function generates a random prime Dst such that +// Dst == 3 mod 4 and +// Low <= Dst < High +// for e in PubExp[]: GCD( Dst - 1, e ) == 1 +// +// (pu64PubExp, nPubExp) can be (NULL, 0) if no pubexp restriction is needed. +// The nTries parameter specifies the maximum number of candidate numbers +// until a prime number is found satisfying the above restrictions. +// If the function cannot find one after nTries, it returns SYMCRYPT_INVALID_ARGUMENT +// (For example, if the caller passes in a Low bound bigger than the High bound, +// or if there are no primes between Low and High). +// +// The values of the pubexps, piLow and piHigh are public. +// +// flags: None +// +// Requirements: +// - SymCryptIntBitsizeOfValue( piHigh ) <= SymCryptIntBitsizeOfObject(piDst) +// - piLow > 3 +// - Each public exponent must be greater than 0 +// - 0 <= nPubExp <= SYMCRYPT_RSAKEY_MAX_NUMOF_PUBEXPS +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_INT_PRIME_GEN( Dst.nDigits ) +// + +//===================================================== +// Modular arithmetic +// +// To perform modular arithmetic the modulus has to be prepared into a Modulus object. +// Arithmetic in the ring modulo the modulus can then be done using ModElement objects. +// + +PSYMCRYPT_DIVISOR +SYMCRYPT_CALL +SymCryptDivisorFromModulus( _In_ PSYMCRYPT_MODULUS pmSrc ); +// +// Returns the DIVISOR object inside the MODULUS object. +// +// Digit size of the DIVISOR object is equal to the digit size of the MODULUS object. +// This object has one use: +// - On an initialized MODULUS object the function returns a pointer to the DIVISOR that contains +// the modulus value. Modifying the DIVISOR value from an initialized MODULUS value corrupts +// the modulus. +// +// This is typically a very fast function, with a run-time cost that is zero or one instruction. +// + +PSYMCRYPT_INT +SYMCRYPT_CALL +SymCryptIntFromModulus( _In_ PSYMCRYPT_MODULUS pmSrc ); +// +// Returns the INT object inside the MODULUS object. +// +// Digit size of the INT object is equal to the digit size of the MODULUS object. +// This object has two uses: +// - On an uninitialized MODULUS object it is a suitable place to put a value before calling +// SymCryptIntToModulus. +// - On an initialized MODULUS object the function returns a pointer to the INT that contains +// the modulus value. Modifying the INT value from an initialized MODULUS value corrupts +// the modulus. +// +// This is typically a very fast function, with a run-time cost that is zero or one instruction. +// + +#define SYMCRYPT_SCRATCH_BYTES_FOR_INT_TO_MODULUS( _nDigits ) SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_INT_TO_MODULUS( _nDigits ) + +VOID +SYMCRYPT_CALL +SymCryptIntToModulus( + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_MODULUS pmDst, + UINT32 averageOperations, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Create a modulus from an INT. +// Requirements: +// - Src != 0 +// - Dst.nDigits == Src.nDigits +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_INT_TO_MODULUS( Src.nDigits ) +// SymCryptIntBitsizeOfValue( Src ) is published. +// averageOperations is the average number of multiplications that are performed on a ModElement created with this modulus between the time that the value is +// created as a ModElement and the time it is exported out of modElement form. +// There are multiple ways of doing modular computations; some of them are faster but have an overhead for converting into and out of modular form. +// For example, for RSA verification the # operations is small and conversion overhead should be avoided. +// For RSA signatures, the # operations is large and the fastest per-operation form should be used. +// This parameter allows the library to select the right kind of modular arithmetic for this modulus. +// The following flags are supported: +// SYMCRYPT_FLAG_DATA_PUBLIC +// Signals the code that the Src value is public. This may improve performance because it allows further optimizations that +// depend on the value. (For example, if Src is close to a power of 2, the modulo reduction can be made significantly faster.) +// SYMCRYPT_FLAG_MODULUS_PARITY_PUBLIC +// Signals that the parity of Src (whether it is even or odd) may be treated as a public value. +// There are some algorithms that can speed up operations on odd moduli, but their use publishes the fact that the modulus is odd. +// SYMCRYPT_FLAG_MODULUS_ADDITIVE_ONLY +// The modulus will only be used for addition and subtraction, not for multiplication or division. +// This can significantly reduce the cost of this function as there is no need to pre-compute the divisor information. +// SYMCRYPT_FLAG_MODULUS_PRIME +// Signals that the modulus is a prime. Some algorithms can be more efficient for prime moduli. Note that setting this flag +// for a non-prime modulus can result in incorrect answers. +// The flags and averageOperations parameters are published. +// + +#define SYMCRYPT_FLAG_MODULUS_PARITY_PUBLIC (0x02) +#define SYMCRYPT_FLAG_MODULUS_ADDITIVE_ONLY (0x04) +#define SYMCRYPT_FLAG_MODULUS_PRIME (0x08) + +#define SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( _nDigits ) SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( _nDigits ) + + +VOID +SYMCRYPT_CALL +SymCryptIntToModElement( + _In_ PCSYMCRYPT_INT piSrc, + _In_ PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Dst = Src mod Mod +// Requirements: +// - Dst.nDigits == Mod.nDigits +// - piSrc.nDigits <= 2 * Mod.nDigits +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( Mod.nDigits ) +// +// Note: the input is limited in size to be no more than twice the modulus size (in digits). +// This should be a rare case, and it simplifies the scratch space handling significantly. +// + +VOID +SYMCRYPT_CALL +SymCryptModElementToInt( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_INT piDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Dst = Src +// +// Requirement: +// - Dst.nDigits >= Mod.nDigits +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( Mod.nDigits ) +// +// Convert a ModElement to an Int. +// The internal format in which a ModElement is stored might be different +// from the format of an Int; this function converts the value to the INT format. +// + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptModElementSetValue( + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_NUMBER_FORMAT format, + PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Dst = decode( pbSrc, cbSrc, format ) mod Mod +// Requirement: +// - SymCryptDigitsFromBits( 8 * cbSrc ) <= Mod.nDigits +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( Mod.nDigits ) +// +// This is a separate function as it is frequently used, and does not require the allocation of an INT object. +// + +VOID +SYMCRYPT_CALL +SymCryptModElementSetValueUint32( + UINT32 value, + _In_ PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Dst = value mod Mod +// value is published. +// Requirement: +// - value < Mod +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( Mod.nDigits ) +// Note: this function does NOT hide the value. +// Rationale: typically the value parameter is known, either 0 or 1. +// + +VOID +SYMCRYPT_CALL +SymCryptModElementSetValueNegUint32( + UINT32 value, + _In_ PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Dst = -value mod Mod +// value is published. +// Requirement: +// - 0 < value < Mod +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( Mod.nDigits ) +// Note: this function does NOT hide the value. +// Rationale: typically the value parameter is known, either 0 or 1. +// + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptModElementGetValue( + PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_writes_bytes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst, + SYMCRYPT_NUMBER_FORMAT format, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// (pbDst, cbDst) = encode( format, cbDst, Src ) +// Requirement: +// - SymCryptDigitsFromBits( 8 * cbDst ) <= Mod.nDigits +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( Mod.nDigits ) +// +// Retrieve the value of a ModElement as an array of bytes +// + +UINT32 +SYMCRYPT_CALL +SymCryptModElementIsEqual( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2 ); +// +// Returns a mask value which is 0xffffffff if Src1 = Src2 and 0 otherwise. +// +// Both SYMCRYPT_MODELEMENTs should have been created using the modulus pmMod. Otherwise +// the result is undefined. +// + +UINT32 +SYMCRYPT_CALL +SymCryptModElementIsZero( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc ); +// +// Returns a mask value which is 0xffffffff if Src = 0 and 0 otherwise. +// +// Useful for quickly checking if a ModElement is 0. +// + + +//=============================== +// Modular arithmetic. +// + +VOID +SYMCRYPT_CALL +SymCryptModSetRandom( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Dst = random value modulus Mod. +// Requirement: +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( Mod.nDigits ) +// +// Random value is chosen uniformly from the set of allowed values. +// By default this function does not return the values 0, 1, or -1 (see below NOTE for small moduli exception) +// Flags parameter can signal that these special values are allowed. +// flags parameter is published. +// +// Rationale: these values cause problems in many situations, and for all commonly used cryptographic modulo sizes +// the absence of these values is statistically undetectable even if they are allowed. +// For completeness of the API, the flags parameter can be used to allow these three values. +// flags is a bitmask containing a combination of the following bit values: +// SYMCRYPT_FLAG_MODRANDOM_ALLOW_ZERO +// SYMCRYPT_FLAG_MODRANDOM_ALLOW_ONE +// SYMCRYPT_FLAG_MODRANDOM_ALLOW_MINUSONE +// Specifying ALLOW_ZERO implies ALLOW_ONE, there is no way to allow 0 and disallow 1. +// +// NOTE: +// For very small moduli (1, 2, and 3), not allowing 0, 1, or -1 by default does not make sense because this would +// exclude all possible values! Instead the default behavior is to allow -1 for these moduli. +// Modulo 1 => return 0 by default +// Modulo 2 => return 1 by default +// may also return 0 if SYMCRYPT_FLAG_MODRANDOM_ALLOW_ZERO is specified +// Modulo 3 => return 2 by default +// may also return 1 if SYMCRYPT_FLAG_MODRANDOM_ALLOW_ONE is specified, and +// may also return 0 or 1 if SYMCRYPT_FLAG_MODRANDOM_ALLOW_ZERO is specified, +// +// Callers relying on not having 0, 1, or -1 are required to pass a larger modulus. + +#define SYMCRYPT_FLAG_MODRANDOM_ALLOW_ZERO (0x01) +#define SYMCRYPT_FLAG_MODRANDOM_ALLOW_ONE (0x02) +#define SYMCRYPT_FLAG_MODRANDOM_ALLOW_MINUSONE (0x04) + + +VOID +SYMCRYPT_CALL +SymCryptModNeg( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Dst = -Src mod Mod +// Requirements: +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( Mod.nDigits ) +// + +VOID +SYMCRYPT_CALL +SymCryptModAdd( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Dst = Src1 + Src2 mod Mod +// Requirement: +// - Src1.modulus == Src2.modulus == Mod. +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( Mod.nDigits ) +// Dst == Src1, Dst == Src2, and Src1 == Src2 are all allowed. +// Rationale: +// scratch space can make the mod-add faster for side-channel safe implementations. +// It allows: +// Dst = Src1 + Src2; +// Tmp = Dst - Mod; +// Dst = choose( Dst, Tmp, carry_bits ) +// And the choose() operation is fast because it does not require carry propagation. +// + + +VOID +SYMCRYPT_CALL +SymCryptModSub( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Dst = Src1 - Src2 mod Mod +// Requirement: +// Same as SymCryptModAdd +// + + +VOID +SYMCRYPT_CALL +SymCryptModMul( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Dst = Src1 * Src2 mod Mod +// Requirement: +// - Src1.modulus == Src2.modulus == Mod. +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( Mod.nDigits ) +// + +VOID +SYMCRYPT_CALL +SymCryptModSquare( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Dst = Src1^2 mod Mod +// Requirement: +// - Src.modulus == Mod. +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( Mod.nDigits ) +// + +VOID +SYMCRYPT_CALL +SymCryptModDivPow2( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + UINT32 exp, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Dst = Src / 2^exp mod Mod +// Requirements: +// - Mod is odd. +// - Src.modulus == Dst.modulus == Mod. +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( Mod.nDigits ) +// +// Remarks: +// - The value exp is *** public ***; hence it should be treated as known to the attacker. +// - This function may write intermediate values to peDst and read them back, violating the +// read-once/write-once rule, so the caller must ensure that the peDst buffer is trusted. +// + +#define SYMCRYPT_SCRATCH_BYTES_FOR_MODINV( _nDigits ) SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_MODINV( _nDigits ) + +// SYMCRYPT_FLAG_DATA_PUBLIC signals that the Src element is public and does not have to be protected +// against side-channel attacks. The public-ness of the Modulus is part of the Modulus object, specified when the +// modulus value was set. +// Marking the source value as public has very little effect on performance, but it removes the random blinding used. +// The main goal of this flag is to allow ECDSA verification without a source of random numbers. + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptModInv( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Dst = 1/Src mod Mod. +// +// - pmMod: Modulus, must have the SYMCRYPT_FLAG_MODULUS_PRIME and SYMCRYPT_FLAG_DATA_PUBLIC flag set. +// Non-prime or non-public moduli are currently not supported. +// - peSrc: Source value, modulo pmMod +// - peDst: Destination value, mod element modulo pmMod +// - flags: SYMCRYPT_FLAG_DATA_PUBLIC signals that peSrc is a public value. +// - pbScratch/cbScratch: scratch space >= SYMCRYPT_SCRATCH_BYTES_FOR_MODINV( nDigits( pmMod ) ) +// +// Returns an error if +// - GCD( Src, Mod ) != 1 +// + +#define SYMCRYPT_SCRATCH_BYTES_FOR_MODEXP( _nDigits ) SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_MODEXP( _nDigits ) + +VOID +SYMCRYPT_CALL +SymCryptModExp( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peBase, + _In_ PCSYMCRYPT_INT piExp, + UINT32 nBitsExp, + UINT32 flags, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Dst = Base ^ Exp mod Mod +// where only the least significant (nBitsExp) bits of the exponent are used. +// +// Requirements: +// - nBitsExp != 0 +// - Mod > 1 +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_MODEXP( Mod.nDigits ) +// +// Allowed flags: +// SYMCRYPT_FLAG_DATA_PUBLIC: If set then the algorithm +// is not side-channel safe (For use in RSA encryption - exponentiation +// with a public exponent). The default behaviour is side channel safety. +// +// Remarks: +// - The undefined operation 0^0 will return 1. +// - The value nBitsExp is *** public ***; hence it should be treated as known to the attacker. +// Examples: +// - nBitsExp = SymCryptIntBitsizeOfObject( piExp ) => This processes all the +// bits of the exponent object. +// - nBitsExp = number of bits of modulus ==> This processes the same +// number of bits (even leading zeros) as the modulus. In this case +// the exponent should have a value with bitsize less or equal to the +// bitsize of the modulus. +// - nBitsExp = max(1, SymCryptIntBitsizeOfValue( piExp )) => This processes +// the bits of the exponent ignoring the leading zeros. Therefore, this +// option leaks the bitsize of the value of the exponent. +// + +// SYMCRYPT_MODMULTIEXP_MAX_NBASES, _NBITSEXP: The maximum number of bases +// and exponent bits allowed for the multi-exponentiation operation. +#define SYMCRYPT_MODMULTIEXP_MAX_NBASES (8) +#define SYMCRYPT_MODMULTIEXP_MAX_NBITSEXP (SYMCRYPT_INT_MAX_BITS) + +#define SYMCRYPT_SCRATCH_BYTES_FOR_MODMULTIEXP( _nDigits, _nBases, _nBitsExp ) SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_MODMULTIEXP( _nDigits, _nBases, _nBitsExp ) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptModMultiExp( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_reads_( nBases ) PCSYMCRYPT_MODELEMENT * peBaseArray, + _In_reads_( nBases ) PCSYMCRYPT_INT * piExpArray, + UINT32 nBases, + UINT32 nBitsExp, + UINT32 flags, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Dst = ( peBaseArray[0]^piExpArray[0] * peBaseArray[1]^piExpArray[1] * ... * +// peBaseArray[nBases-1]^piExpArray[nBases-1] ) mod Mod +// where only the least significant (nBitsExp) bits of the exponents are used. +// +// Requirements: +// - 1<= nBitsExp <= SYMCRYPT_MODMULTIEXP_MAX_NBITSEXP +// - Mod > 1 +// - 1<= nBases <= SYMCRYPT_MODMULTIEXP_MAX_NBASES +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_MODMULTIEXP( Mod.nDigits, nBases, nBitsExp ) +// +// Allowed flags: +// SYMCRYPT_FLAG_DATA_PUBLIC: If set then the algorithm +// is not side-channel safe (For use in DSA verification). +// The default behaviour is side channel safety. +// + +// ========================================= +// Tools for side-channel safety +// ======================================== + +// +//Side-channel safe lookup table +// + +typedef struct _SYMCRYPT_SCSTABLE { + UINT32 groupSize; + UINT32 interleaveSize; + UINT32 nElements; // must be multiple of groupSize + UINT32 elementSize; // # bytes in each element, note: limited to UINT32 for efficiency + PBYTE pbTableData; + UINT32 cbTableData; +} SYMCRYPT_SCSTABLE, *PSYMCRYPT_SCSTABLE; + +UINT32 +SYMCRYPT_CALL +SymCryptScsTableInit( + _Out_ PSYMCRYPT_SCSTABLE pScsTable, + UINT32 nElements, + UINT32 elementSize ); +// Initializes an ScsTable for nElements elements each of elementSize bytes. +// nElements and elementSize are limited to less than 2^16. +// Return value is the size of the buffer that the caller needs to provide. +// +// Requirements: +// - nElements must be a multiple of groupSize and elementSize must be a +// multiple of interleaveSize. Currently all implementations have as +// defaults +// groupSize = 4 +// interleaveSize = 8 +// + +VOID +SYMCRYPT_CALL +SymCryptScsTableSetBuffer( + _Inout_ PSYMCRYPT_SCSTABLE pScsTable, + _Inout_updates_bytes_( cbBuffer ) PBYTE pbBuffer, + UINT32 cbBuffer ); +// Sets the caller-provided buffer on the ScsTable. +// cbBuffer should be >= the size returned by the SymCryptScsTableInit function + +VOID +SYMCRYPT_CALL +SymCryptScsTableStore( + _Inout_ PSYMCRYPT_SCSTABLE pScsTable, + UINT32 iIndex, + _In_reads_bytes_( cbData ) PCBYTE pbData, + UINT32 cbData ); +// Not side-channel safe; publishes iIndex. +// cbData must match the elementSize i.e. the size of a single element. + +VOID +SYMCRYPT_CALL +SymCryptScsTableLoad( + _In_ PSYMCRYPT_SCSTABLE pScsTable, + UINT32 iIndex, + _Out_writes_bytes_(cbData) PBYTE pbData, + UINT32 cbData ); +// Side-channel safe fetching of data; iIndex is kept secret. +// cbData must match the elementSize i.e. the size of a single element. + +VOID +SYMCRYPT_CALL +SymCryptScsTableWipe( + _Inout_ PSYMCRYPT_SCSTABLE pScsTable ); +// Wipes the part of the buffer that the table used + +// Other Side-channel safety tools + +VOID +SYMCRYPT_CALL +SymCryptScsRotateBuffer( + _Inout_updates_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + SIZE_T lshift ); +// Rotates buffer left by lshift without revealing lshift +// through side channels. +// - pbBuffer/cbBuffer: buffer to rotate +// pbBuffer must be aligned to the native integer of the platform (4 or 8 bytes) +// cbBuffer must be a power of two >= 32 +// - lshift: # bytes to left rotate the buffer +// pbBuffer[0] will get the value pbBuffer[ lshift % cbBuffer ] + +VOID +SYMCRYPT_CALL +SymCryptScsCopy( + _In_reads_( cbDst ) PCBYTE pbSrc, + SIZE_T cbSrc, + _Out_writes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst ); +// Copy cbSrc bytes of pbSrc into pbDst without revealing cbSrc +// through side channels. +// +// WARNING: pbSrc buffer must be at least cbDst bytes long; not cbSrc! +// +// - pbSrc pointer to buffer to copy data from +// This buffer must be at least cbDst bytes long +// - cbSrc number of bytes to be copied, must be <= 2^31 +// - pbDst destination buffer +// - pbDst size of the destination buffer, must be <= 2^31 +// Equivalent to: +// n = min( cbSrc, cbDst ) +// pbDst[ 0.. n-1 ] = pbSrc[ 0 .. n - 1 ] +// cbSrc is protected from side-channels; cbDst is public. + + +// +// Mask generation functions. +// All these functions are side-channel safe in all parameters. +// Naming convention: +// SymCrypt <MaskType> <Op> <ParameterType> +// <MaskType> is the type of the function result: +// Mask32 UINT32 mask that is 0 or -1 +// Mask64 UINT64 mask that is 0 or -1 +// <Op> is the boolean operation performed on the parameters +// IsZero v == 0 +// IsNonzero v != 0 +// Eq a == b +// Neq a != b +// <ParameterType> is an indication of the parameter type supported. +// U31 UINT32 which is limited to values < 2^31 +// This allows more efficient masking functions. +// U32 UINT32 +// Other mask types, operations, and parameter types may be defined in future. +// + +UINT32 +SYMCRYPT_CALL +SymCryptMask32IsZeroU31( UINT32 v ); + +UINT32 +SYMCRYPT_CALL +SymCryptMask32IsNonzeroU31( UINT32 v ); + + +UINT32 +SYMCRYPT_CALL +SymCryptMask32EqU32( UINT32 a, UINT32 b ); + +UINT32 +SYMCRYPT_CALL +SymCryptMask32NeqU31( UINT32 a, UINT32 b ); + +UINT32 +SYMCRYPT_CALL +SymCryptMask32LtU31( UINT32 a, UINT32 b ); + + +// +// Other helper functions +// +SIZE_T +SYMCRYPT_CALL +SymCryptRoundUpPow2Sizet( SIZE_T v ); +// Round up to the next power of 2 +// +// Requirements: +// v <= (SIZE_T_MAX / 2) + 1 +// i.e. rounding v up to the next power of 2 fits within SIZE_T, so v is +// less than or equal to the maximum power of 2 representable in SIZE_T + + +//===================================================== +//===================================================== +// RSA padding operations +//===================================================== +//===================================================== + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaPkcs1ApplyEncryptionPadding( + _In_reads_bytes_( cbPlaintext ) PCBYTE pbPlaintext, + SIZE_T cbPlaintext, + _Out_writes_bytes_( cbPkcs1Format ) PBYTE pbPkcs1Format, + SIZE_T cbPkcs1Format ); +// +// Applies the RSA PKCS1 v1.5 encryption padding to the plaintext buffer. +// - Plaintext buffer containing plaintext to be encoded +// - Pkcs1Format Output buffer, typically the size of the RSA modulus +// Requirement: cbPkcs1Format >= cbPlaintext + 11 due to the PKCS1 overhead. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaPkcs1RemoveEncryptionPadding( + _Inout_updates_bytes_( cbPkcs1Buffer ) PBYTE pbPkcs1Format, + SIZE_T cbPkcs1Format, + SIZE_T cbPkcs1Buffer, + _Out_writes_bytes_opt_( cbPlaintext ) PBYTE pbPlaintext, + SIZE_T cbPlaintext, + _Out_ SIZE_T *pcbPlaintext ); +// +// Remove the PKCS1 encryption padding and extract the message plaintext. +// This function is side-channel safe w.r.t. the data in the Pkcs1Format buffer. +// - pbPkcs1Format points to a buffer containing the raw RSA decrypted data. +// This buffer will be modified by this function. +// - cbPkcs1Format is the # bytes of the buffer that were decrypted with raw RSA +// - cbPkcs1Buffer is the size of the buffer that pbPkcs1Format points to +// cbPkcs1Buffer must be a power of 2 and >= cbPkcs1Format and >= 32 +// cbPkcs1Buffer must be <= 2^30 +// - pbPlaintext/cbPlaintext is the output buffer that will receive the data. +// if pbPlaintext == NULL no message is output, but *pcbPlaintext is still set. +// - pcbPlaintext receives the # bytes in the actual decrypted message. +// set to 0 if an error occurred. +// + +#define SYMCRYPT_SCRATCH_BYTES_FOR_RSA_OAEP( _hashAlgorithm, _nBytesOAEP ) SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_RSA_OAEP( _hashAlgorithm, _nBytesOAEP ) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaOaepApplyEncryptionPadding( + _In_reads_bytes_( cbPlaintext ) PCBYTE pbPlaintext, + SIZE_T cbPlaintext, + _In_ PCSYMCRYPT_HASH hashAlgorithm, + _In_reads_bytes_( cbLabel ) PCBYTE pbLabel, + SIZE_T cbLabel, + _In_reads_bytes_opt_( cbSeed ) PCBYTE pbSeed, + SIZE_T cbSeed, + _Out_writes_bytes_( cbOaepFormat ) PBYTE pbOaepFormat, + SIZE_T cbOaepFormat, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Apply the RSA OAEP encryption padding to the plaintext buffer. +// - Plaintext Plaintext to be encoded +// - hashAlgorithm Hash algorithm to use during padding +// - Label Label input for OAEP +// - Seed Specified seed value. 0 <= cbSeed < hash size +// - OaepFormat Output buffer, typically the size of the RSA modulus +// +// Remarks: +// - If pbSeed == NULL and cbSeed != 0, then the function picks +// a uniformly random seed of size cbSeed bytes. +// +// Requirements: +// cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_RSA_OAEP( hashAlgorithm, cbOAEPFormat ) +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaOaepRemoveEncryptionPadding( + _In_reads_bytes_( cbOAEPFormat ) + PCBYTE pbOAEPFormat, + SIZE_T cbOAEPFormat, + _In_ PCSYMCRYPT_HASH hashAlgorithm, + _In_reads_bytes_( cbLabel ) PCBYTE pbLabel, + SIZE_T cbLabel, + UINT32 flags, + _Out_writes_bytes_( cbPlaintext ) + PBYTE pbPlaintext, + SIZE_T cbPlaintext, + _Out_ SIZE_T *pcbPlaintext, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Removes the RSA OAEP encryption padding from the OAEP formatted buffer +// after it checks the validity of the format. +// +// *pcbPlaintext is the number of bytes output. If pbPlaintext == NULL then this +// is the only output value. +// +// Allowed flags: +// None +// +// Requirements: +// cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_RSA_OAEP( hashAlgorithm, cbOAEPFormat ) +// + +#define SYMCRYPT_SCRATCH_BYTES_FOR_RSA_PKCS1( _nBytesPKCS1 ) SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_RSA_PKCS1( _nBytesPKCS1 ) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaPkcs1ApplySignaturePadding( + _In_reads_bytes_( cbHash ) PCBYTE pbHash, + SIZE_T cbHash, + _In_reads_bytes_( cbHashOid ) + PCBYTE pbHashOid, + SIZE_T cbHashOid, + UINT32 flags, + _Out_writes_bytes_( cbPKCS1Format ) + PBYTE pbPKCS1Format, + SIZE_T cbPKCS1Format ); +// +// Applies the RSA PKCS1 v1.5 signature padding to the source buffer, which typically contains the +// hash of the message. +// +// Allowed flags: +// SYMCRYPT_FLAG_RSA_PKCS1_NO_ASN1 +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaPkcs1VerifySignaturePadding( + _In_reads_bytes_( cbHash ) PCBYTE pbHash, + SIZE_T cbHash, + _In_reads_( nOIDCount ) PCSYMCRYPT_OID pHashOIDs, + _In_ SIZE_T nOIDCount, + _In_reads_bytes_( cbPKCS1Format ) + PCBYTE pbPKCS1Format, + SIZE_T cbPKCS1Format, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Verifies that the RSA PKCS1 v1.5 signature padding is valid. +// +// It returns SYMCRYPT_NO_ERROR if the verification succeeded or SYMCRYPT_VERIFICATION_FAIL +// if it failed. +// +// Allowed flags: +// SYMCRYPT_FLAG_RSA_PKCS1_OPTIONAL_HASH_OID +// +// Requirements: +// cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_RSA_PKCS1( cbPKCS1Format ) +// + +#define SYMCRYPT_SCRATCH_BYTES_FOR_RSA_PSS( _hashAlgorithm, _nBytesMessage, _nBytesPSS ) SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_RSA_PSS( _hashAlgorithm, _nBytesMessage, _nBytesPSS ) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaPssApplySignaturePadding( + _In_reads_bytes_( cbHash ) PCBYTE pbHash, + SIZE_T cbHash, + _In_ PCSYMCRYPT_HASH hashAlgorithm, + _In_reads_bytes_opt_( cbSalt ) + PCBYTE pbSalt, + _In_range_(0, cbPSSFormat) SIZE_T cbSalt, + UINT32 nBitsOfModulus, + UINT32 flags, + _Out_writes_bytes_( cbPSSFormat ) + PBYTE pbPSSFormat, + SIZE_T cbPSSFormat, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Applies the RSA PSS signature padding to the source buffer, which typically contains the +// hash of the message. +// +// Remarks: +// - If pbSalt == NULL and cbSalt != 0, then the function picks +// a uniformly random salt of size cbSalt bytes. +// +// Allowed flags: +// None +// +// Requirements: +// cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_RSA_PSS( hashAlgorithm, cbHash, cbPSSFormat ) +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaPssVerifySignaturePadding( + _In_reads_bytes_( cbHash ) PCBYTE pbHash, + SIZE_T cbHash, + _In_ PCSYMCRYPT_HASH hashAlgorithm, + _In_range_(0, cbPSSFormat) SIZE_T cbSalt, + _In_reads_bytes_( cbPSSFormat ) + PCBYTE pbPSSFormat, + SIZE_T cbPSSFormat, + UINT32 nBitsOfModulus, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Verifies that the RSA PSS signature padding is valid. +// +// It returns SYMCRYPT_NO_ERROR if the verification succeeded or SYMCRYPT_VERIFICATION_FAIL +// if it failed. +// +// Allowed flags: +// SYMCRYPT_FLAG_RSA_PSS_VERIFY_WITH_MINIMUM_SALT +// +// When the flag is set, this function will do signature verification using the cbSalt parameter as +// a minimum value for the salt length, rather than using it as an exact value. Specifying this and +// setting cbSalt = 0 allows callers to verify a signature which has a valid encoding with any salt +// length using a single call. +// +// +// Requirements: +// cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_RSA_PSS( hashAlgorithm, cbHash, cbPSSFormat ) +// + +//===================================================== +//===================================================== +// EC point operations +//===================================================== +//===================================================== + +PCSYMCRYPT_MODULUS +SYMCRYPT_CALL +SymCryptEcurveGroupOrder( _In_ PCSYMCRYPT_ECURVE pCurve ); +// +// This function returns a pointer to the group order of the curve's subgroup. +// + +UINT32 +SYMCRYPT_CALL +SymCryptEcurveDigitsofScalarMultiplier( _In_ PCSYMCRYPT_ECURVE pCurve ); +// +// This function returns the number of digits of a scalar that is big enough to +// store a multiplier of an elliptic curve point. +// See also, SymCryptEcurveSizeofScalarMultiplier. +// + +UINT32 +SYMCRYPT_CALL +SymCryptEcurveDigitsofFieldElement( _In_ PCSYMCRYPT_ECURVE pCurve ); +// +// This function returns the number of digits for one coordinate of the public key. +// + +//===================================================== +// GETSET_VALUE_ECURVE_OPERATIONS +// + +#define SYMCRYPT_SCRATCH_BYTES_FOR_GETSET_VALUE_ECURVE_OPERATIONS( _pCurve ) SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_GETSET_VALUE_ECURVE_OPERATIONS( _pCurve ) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEcpointSetValue( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_reads_bytes_(cbSrc) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_NUMBER_FORMAT nformat, + SYMCRYPT_ECPOINT_FORMAT eformat, + _Out_ PSYMCRYPT_ECPOINT poDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Set the value of an ECPOINT object from a source buffer pbSrc of size cbSrc. The buffer +// will contain the necessary coordinates of the ECPOINT in the format specified by nformat +// and eformat. The nformat determines the format of the integers in the buffer while the +// eformat determines the layout (and the number) of the coordinates. +// +// Requirements: +// - cbSrc = X * SymCryptEcurveSizeofFieldElement( pCurve ) where X depends on the +// eformat specified and denotes the number of coordinates. For example, for +// SYMCRYPT_ECPOINT_FORMAT_XY it is equal to 2. +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_GETSET_VALUE_ECURVE_OPERATIONS( pCurve ) +// +// Flag values: +// SYMCRYPT_FLAG_DATA_PUBLIC data is public (no side-channel protection needed) +// +// Rationale: +// Scratch space provides room for conversion of point representations. +// +// Example: +// Set an ECPOINT to (X,Y) point in affine coordinates where the size of each coordinate +// is t = SymCryptEcurveSizeofFieldElement( pCurve ) bytes. The coordinates are +// X=(X_(t-1), ... , X_1, X_0) and Y=(Y_(t-1), ... , Y_1, Y_0) with t-1 the +// most significant byte. Then the function can be called with +// pbSrc = { X_(t-1), ... , X_1, X_0, Y_(t-1), ... , Y_1, Y_0 } +// cbSrc = 2 * t +// nformat = SYMCRYPT_NUMBER_FORMAT_MSB_FIRST +// eformat = SYMCRYPT_ECPOINT_FORMAT_XY +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEcpointGetValue( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + SYMCRYPT_NUMBER_FORMAT nformat, + SYMCRYPT_ECPOINT_FORMAT eformat, + _Out_writes_bytes_(cbDst) PBYTE pbDst, + SIZE_T cbDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Retrieve the value of an ECPOINT object into a destination buffer pbDst of size cbDst. The buffer +// will contain the necessary coordinates of the ECPOINT in the format specified by nformat +// and eformat. The nformat determines the format of the integers in the buffer while the +// eformat determines the layout (and the number) of the coordinates. +// +// Flag values: +// SYMCRYPT_FLAG_DATA_PUBLIC data is public (no side-channel protection needed) +// +// Remarks: +// - If the source point is the "zero" point and it cannot be exported into the +// required ECPOINT_FORMAT (XY or X), the function fails with SYMCRYPT_INCOMPATIBLE_FORMAT. +// +// Requirements: +// - cbDst = X * SymCryptEcurveSizeofFieldElement( pCurve ) where X depends on the +// eformat specified and denotes the number of coordinates. For example for SYMCRYPT_ECPOINT_FORMAT_XY it is equal to 2. +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_GETSET_VALUE_ECURVE_OPERATIONS( pCurve ) +// +// Rationale: +// Scratch space provides room for conversion of point representations. +// + +// +// Low-level flags for ECC operations +// +// SYMCRYPT_FLAG_DATA_PUBLIC: When set, the operation will not be side-channel safe. +// It is used to speed up operation on public data. (default: side-channel safe) +// +// SYMCRYPT_FLAG_ECC_LL_COFACTOR_MUL: When set, the underlying operation will multiply +// by the cofactor of the curve. (default: no multiplication by the cofactor) +// Remark: **Notice that the default behaviour is the opposite of the higher-level +// functions in symcrypt.h.** +#define SYMCRYPT_FLAG_ECC_LL_COFACTOR_MUL (0x20) + +//===================================================== +// COMMON_ECURVE_OPERATIONS +// + +#define SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS( _pCurve ) SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS( _pCurve ) + +VOID +SYMCRYPT_CALL +SymCryptEcpointSetZero( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Set the destination point poDst to the zero +// element of the additive group defined by the +// elliptic curve addition rule. +// +// Requirements: +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS( pCurve ). +// + +VOID +SYMCRYPT_CALL +SymCryptEcpointSetDistinguishedPoint( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Set the destination point poDst to the +// distinguished point of the curve. +// +// Requirements: +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS( pCurve ). +// + +#define SYMCRYPT_FLAG_ECPOINT_EQUAL (0x01) +#define SYMCRYPT_FLAG_ECPOINT_NEG_EQUAL (0x02) + +UINT32 +SYMCRYPT_CALL +SymCryptEcpointIsEqual( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc1, + _In_ PCSYMCRYPT_ECPOINT poSrc2, + UINT32 flags, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); +// +// If the flags argument is equal to 0 (default) or SYMCRYPT_FLAG_ECPOINT_EQUAL, it returns a mask value which is +// 0xffffffff if poSrc1 = poSrc2 and 0 otherwise. +// If the flags argument is equal to SYMCRYPT_FLAG_ECPOINT_NEG_EQUAL, it returns a mask value which is +// 0xffffffff if poSrc1 = -poSrc2 and 0 otherwise. +// If the flags argument is equal to SYMCRYPT_FLAG_ECPOINT_EQUAL | SYMCRYPT_FLAG_ECPOINT_NEG_EQUAL, +// it returns a mask value which is 0xffffffff if (poSrc1 = poSrc2) or (poSrc1 = -poSrc2) and 0 otherwise. +// +// The points should have been created with the same curve pCurve. Otherwise the result is undefined. +// +// Requirements: +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS( pCurve ). +// + +UINT32 +SYMCRYPT_CALL +SymCryptEcpointIsZero( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Returns a mask value which is 0xffffffff if the point is the zero point of the group. +// +// Requirements: +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS( pCurve ). +// + +UINT32 +SYMCRYPT_CALL +SymCryptEcpointOnCurve( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Returns a mask value which is 0xffffffff if the point is on curve. +// +// Requirements: +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS( pCurve ). +// + +VOID +SYMCRYPT_CALL +SymCryptEcpointAdd( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc1, + _In_ PCSYMCRYPT_ECPOINT poSrc2, + _Out_ PSYMCRYPT_ECPOINT poDst, + UINT32 flags, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Point addition over the curve pCurve. +// poDst = poSrc1 + poSrc2 +// +// Allowed flags: +// SYMCRYPT_FLAG_DATA_PUBLIC: If set then the algorithm +// is not side-channel safe (and faster). The default behaviour +// is side-channel safety. +// +// Remarks: +// - Complete (i.e. works for all points) +// - Writes intermediate results to poDst breaking the read-once/write-once rule +// +// Requirements: +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS( pCurve ). +// + +VOID +SYMCRYPT_CALL +SymCryptEcpointAddDiffNonZero( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc1, + _In_ PCSYMCRYPT_ECPOINT poSrc2, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Point addition when *peSrc1 != +- *peSrc2 +// and none of them is equal to the zero point. +// +// Remarks: +// - Side-channel safe +// - Complete (i.e. works for all points) +// - Writes intermediate results to poDst breaking the read-once/write-once rule +// +// Requirements: +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS( pCurve ). +// + +VOID +SYMCRYPT_CALL +SymCryptEcpointDouble( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_ PSYMCRYPT_ECPOINT poDst, + UINT32 flags, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Point doubling. +// +// Allowed flags: +// SYMCRYPT_FLAG_DATA_PUBLIC: If set then the algorithm +// is not side-channel safe (and faster). The default behaviour +// is side-channel safety. +// +// Remarks: +// - Side-channel safe +// - Writes intermediate results to poDst breaking the read-once/write-once rule +// +// Requirements: +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS( pCurve ). +// + +VOID +SYMCRYPT_CALL +SymCryptEcpointNegate( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Inout_ PSYMCRYPT_ECPOINT poSrc, + UINT32 mask, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Negates (in place) the source point poSrc if mask == 0xffffffff. +// +// Requirements: +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS( pCurve ). +// + +//===================================================== +// SCALAR_ECURVE_OPERATIONS +// + +#define SYMCRYPT_SCRATCH_BYTES_FOR_SCALAR_ECURVE_OPERATIONS( _pCurve ) SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_SCALAR_ECURVE_OPERATIONS( (_pCurve), 1 ) +#define SYMCRYPT_SCRATCH_BYTES_FOR_MULTI_SCALAR_ECURVE_OPERATIONS( _pCurve, _nPoints ) SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_SCALAR_ECURVE_OPERATIONS( (_pCurve), (_nPoints) ) + +VOID +SYMCRYPT_CALL +SymCryptEcpointSetRandom( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Out_ PSYMCRYPT_INT piScalar, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Set the destination point poDst to a random non-zero point +// of the subgroup generated by the distinguished point. +// The function outputs the integer k and the point kG +// where k is picked uniformly at random from the set +// [1, SubgroupOrder-1] ( 0 is excluded). +// +// Requirements: +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_SCALAR_ECURVE_OPERATIONS( pCurve ). +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEcpointScalarMul( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_INT piScalar, + _In_opt_ + PCSYMCRYPT_ECPOINT poSrc, + UINT32 flags, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); +// +// Multiplication of point by scalar. +// poDst = piScalar x poSrc +// +// If poSrc == NULL the algorithm uses the distinguished point of the curve as source +// point and it might be faster (depending on the curve optimizations). +// +// Allowed flags: +// SYMCRYPT_FLAG_ECC_LL_COFACTOR_MUL: If set then +// the scalar is multiplied by the cofactor of +// the curve. The default behaviour is to not multiply +// by the cofactor. +// +// Remarks: +// - Complete +// - Side-channel safe +// +// Requirements: +// - The piScalar must have SymCryptEcurveDigitsofScalarMultiplier( pCurve ) digits. +// - For Non-Montgomery curves, the piScalar must be in the range [0, SubgroupOrder]. +// - This is the caller's responsibility, it is not checked. +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_SCALAR_ECURVE_OPERATIONS( pCurve ). +// + +// SYMCRYPT_ECURVE_MULTI_SCALAR_MUL_MAX_NPOINTS: The maximum number of points allowed for the +// multi-scalar multiplication operation. +#define SYMCRYPT_ECURVE_MULTI_SCALAR_MUL_MAX_NPOINTS (2) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEcpointMultiScalarMul( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_INT * piSrcScalarArray, + _In_ PCSYMCRYPT_ECPOINT * poSrcEcpointArray, + UINT32 nPoints, + UINT32 flags, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); +// +// It executes the multi scalar - add operation for nPoints +// pairs of (exponents, points) in (piSrcScalarArray, poSrcEcpointArray). +// +// If poSrcEcpointArray[0] == NULL the algorithm uses the distinguished point of the curve as +// the first source point and it might be faster (depending on the curve optimizations). +// Only the first source point can be NULL. +// +// Allowed flags: +// SYMCRYPT_FLAG_ECC_LL_COFACTOR_MUL: If set then +// the scalar is multiplied by the cofactor of +// the curve. The default behaviour is to not multiply +// by the cofactor. +// SYMCRYPT_FLAG_DATA_PUBLIC: If set then the algorithm +// is not side-channel safe (For use in the ECDSA +// verification with public information). The default behaviour +// is side channel safe. +// +// Requirements: +// - 1<= nPoints <= SYMCRYPT_ECURVE_MULTI_SCALAR_MUL_MAX_NPOINTS +// - Each piScalar must have SymCryptEcurveDigitsofScalarMultiplier( pCurve ) digits. +// - cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_MULTI_SCALAR_ECURVE_OPERATIONS( pCurve, nPoints ). +// + + +//////////////////////////////////////////////////////////////////////////// +// AES-CTR-DRBG +// + +#define SYMCRYPT_RNG_AES_INTERNAL_SEED_SIZE (32 + 16) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRngAesGenerateSmall( + _Inout_ PSYMCRYPT_RNG_AES_STATE pRngState, + _Out_writes_( cbRandom ) PBYTE pbRandom, + SIZE_T cbRandom, + _In_reads_opt_( cbAdditionalInput ) PCBYTE pbAdditionalInput, + SIZE_T cbAdditionalInput ); +// +// Generate random output from the state per SP 800-90. +// Callers should almost always use SymCryptRngAesGenerate from symcrypt.h instead. +// +// This is the core generation function that produces up to 64 kB at a time +// This function returns an error code so that we can test the +// error handling of having done more than 2^48 requests between reseeds, +// as required by SP 800-90. +// This is also the Generate function of our SP800-90 compliant implementation. +// If pRngState->fips140-2Check is true, this function runs the continuous self test +// required by FIPS 140-2 (but not by FIPS 140-3 as far as we know). +// pbAdditionalInput is optional. +// + +//===================================================== +// ECDSA-EX +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEcDsaSignEx( + _In_ PCSYMCRYPT_ECKEY pKey, + _In_reads_bytes_( cbHashValue ) PCBYTE pbHashValue, + SIZE_T cbHashValue, + _In_opt_ PCSYMCRYPT_INT piK, + SYMCRYPT_NUMBER_FORMAT format, + UINT32 flags, + _Out_writes_bytes_( cbSignature ) PBYTE pbSignature, + SIZE_T cbSignature ); +// +// This algorithm is the same as SymCryptEcDsaSign except that the caller can specify +// a value of k in piK. It is used in verifying test vectors of ECDSA. +// +// Requirements: +// - If piK is not NULL it must have SymCryptEcurveDigitsofScalarMultiplier( pCurve ) digits, and +// must be in range [1, SubgroupOrder-1]. +// - If piK is not NULL and the generated signature would be 0, SYMCRYPT_INVALID_ARGUMENT is +// returned. +// +// Allowed flags: +// SYMCRYPT_FLAG_ECDSA_NO_TRUNCATION: If set then the hash value will +// not be truncated. +// +// SYMCRYPT_FLAG_DATA_PUBLIC: If specified, all inputs, including the private key, are +// considered as public information and are not protected against side channel attacks. +// This should only be used when signing with a publicly known private key (i.e. in the ECDSA self-test) +// + +//===================================================== +// ML-KEM-EX +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlKemEncapsulateEx( + _In_ PCSYMCRYPT_MLKEMKEY pkMlKemkey, + _In_reads_bytes_( cbRandom ) PCBYTE pbRandom, + SIZE_T cbRandom, + _Out_writes_bytes_( cbAgreedSecret ) PBYTE pbAgreedSecret, + SIZE_T cbAgreedSecret, + _Out_writes_bytes_( cbCiphertext ) PBYTE pbCiphertext, + SIZE_T cbCiphertext ); +// +// Performs the Encapsulate operation of ML-KEM using caller-provided random input. +// It is used in verifying test vectors of ML-KEM. +// +// This uses the public information of an ML-KEM keypair to generate an agreed secret +// and a ciphertext. Only a peer with the private information of an ML-KEM keypair can +// decapsulate the ciphertext to compute the agreed secret. +// +// The arguments are the following: +// - pkMlKemkey: a key which contains public information required for encapsulation. +// - (pbRandom, cbRandom): a buffer containing the input random. +// Currently cbRandom must be 32 for all parameterizations of ML-KEM. +// - (pbAgreedSecret, cbAgreedSecret): a buffer into which the generated secret is written. +// Currently cbAgreedSecret must be 32 for all parameterizations of ML-KEM. +// - (pbCiphertext, cbCiphertext): a buffer into which the encapsulated secret is written. +// cbCiphertext must equal cbCiphertext given by SymCryptMlKemSizeofCiphertextFromParams, +// though typically this value can be known statically (see definition of +// SYMCRYPT_MLKEM_CIPHERTEXT_SIZE_*). +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptCompositeMlKemEncapsulateEx( + _In_ PCSYMCRYPT_COMPOSITE_MLKEMKEY pkCompositeMlKemkey, + _In_reads_bytes_opt_( cbMlKemRandom ) PCBYTE pbMlKemRandom, + SIZE_T cbMlKemRandom, + _In_reads_bytes_opt_( cbTradRandom ) PCBYTE pbTradRandom, + SIZE_T cbTradRandom, + _Out_writes_bytes_( cbAgreedSecret ) PBYTE pbAgreedSecret, + SIZE_T cbAgreedSecret, + _Out_writes_bytes_( cbCiphertext ) PBYTE pbCiphertext, + SIZE_T cbCiphertext ); + +// +// Performs the Encapsulate operation of Composite ML-KEM using caller-provided random input. +// It is used in verifying test vectors of Composite ML-KEM. +// +// This uses the public information of a Composite ML-KEM keypair to generate an agreed secret +// and a ciphertext. Only a peer with the private information of a Composite ML-KEM keypair can +// decapsulate the ciphertext to compute the agreed secret. +// +// The arguments are the following: +// - pkCompositeMlKemkey: a key which contains public information required for encapsulation. +// - (pbMlKemRandom, cbMlKemRandom): a buffer containing the input random for the ML-KEM component. +// When pbMlKemRandom is NULL, cbMlKemRandom should be 0, and the function will generate the necessary random input internally. +// Currently when pbMlKemRandom is not NULL, cbMlKemRandom must be 32 for all parameterizations of Composite ML-KEM. +// - (pbTradRandom, cbTradRandom): a buffer containing the input random for the traditional component. +// When the traditional portion is an EC key, cbTradRandom must be equal to the private key size of the EC key. +// If pbTradRandom is NULL, cbTradRandom should be 0, and the function will generate the necessary random input internally. +// Currently, only EC keys are supported for the traditional component. +// - (pbAgreedSecret, cbAgreedSecret): a buffer into which the generated secret is written. +// Currently cbAgreedSecret must be 32 for all parameterizations of Composite ML-KEM. +// - (pbCiphertext, cbCiphertext): a buffer into which the encapsulated secret is written. +// cbCiphertext must equal cbCiphertext given by SymCryptCompositeMlKemSizeofCiphertextFromParams, +// though typically this value can be known statically (see definition of +// SYMCRYPT_COMPOSITE_MLKEM_CIPHERTEXT_SIZE_*). +// + +//=================================================================== +// 802.11 SAE protocol +//=================================================================== +// +// WARNING: These functions are NOT part of the stable SymCrypt API. They are a private +// interface for the Windows WiFi driver. These functions can change or disappear +// at any time as we update our WiFi solutions. +// +// These functions implement the non-standard or 'custom' parts of the SAE protocol for +// 802.11 SAE as specified in IEEE 801.11-2016 12.4 +// +// Parts of the protocol that are easy to implement with conventional crypto functions are +// not included in this custom part. +// +// Limitation: The Hunting-and-Pecking method supports only NIST P256 curve. The Hash-to-Element +// method supports curves NIST P256 and NIST P384. +// + +// +// IANA Group numbers identify the elliptic curve and associated parameters to be used in the SAE method. +// +typedef enum _SYMCRYPT_802_11_SAE_GROUP { + SYMCRYPT_SAE_GROUP_19 = 19, // NIST P256 + SYMCRYPT_SAE_GROUP_20, // NIST P384 +} SYMCRYPT_802_11_SAE_GROUP; + +// The sizes of scalars, elliptic curve points, and HMAC outputs will vary depending on which group is selected. +// The following macros define the largest possible sizes supported. +#define SYMCRYPT_SAE_MAX_MOD_SIZE_BITS 384 +#define SYMCRYPT_SAE_MAX_MOD_SIZE_BYTES SYMCRYPT_BYTES_FROM_BITS( SYMCRYPT_SAE_MAX_MOD_SIZE_BITS ) +#define SYMCRYPT_SAE_MAX_EC_POINT_SIZE_BYTES ( 2 * SYMCRYPT_SAE_MAX_MOD_SIZE_BYTES ) +#define SYMCRYPT_SAE_MAX_HMAC_OUTPUT_SIZE_BYTES SYMCRYPT_BYTES_FROM_BITS( 384 ) + + +typedef struct _SYMCRYPT_802_11_SAE_CUSTOM_STATE SYMCRYPT_802_11_SAE_CUSTOM_STATE, *PSYMCRYPT_802_11_SAE_CUSTOM_STATE; +typedef const SYMCRYPT_802_11_SAE_CUSTOM_STATE *PCSYMCRYPT_802_11_SAE_CUSTOM_STATE; +// +// The struct itself is opaque and is defined elsewhere. +// Caller may not rely on the internal fields of the structure as they can +// change at any time. +// + +VOID SymCrypt802_11SaeGetGroupSizes( + SYMCRYPT_802_11_SAE_GROUP group, + _Out_opt_ SIZE_T* pcbScalar, + _Out_opt_ SIZE_T* pcbPoint ); +// +// Helper function that returns the sizes of the field elements and elliptic curve points in bytes +// for a given IANA group number. Both output parameters are optional. +// + +SYMCRYPT_ERROR +SymCrypt802_11SaeCustomInit( + _Out_ PSYMCRYPT_802_11_SAE_CUSTOM_STATE pState, + _In_reads_( 6 ) PCBYTE pbMacA, + _In_reads_( 6 ) PCBYTE pbMacB, + _In_reads_( cbPassword ) PCBYTE pbPassword, + SIZE_T cbPassword, + _Out_opt_ PBYTE pbCounter, + _Inout_updates_opt_( 32 ) PBYTE pbRand, + _Inout_updates_opt_( 32 ) PBYTE pbMask ); +// +// Initialize the state object with the MAC addresses and password. +// All choices for the protocol (i.e. rand and mask) are made at this time. +// +// - State Protocol state to initialize +// - pbMacA, pbMacB Two 6-byte MAC addresses with MacA >= MacB. +// - pbPassword, cbPassword The password buffer +// - pbCounter If not NULL, receives the counter value of the +// successful PWE generation per section 12.4.4.2.2 +// - pbRand Optional pointer to Rand buffer (see below) +// - pbMask Optional pointer to Mask buffer (see below) +// +// The Rand and Mask buffers are optional. If a pointer is not provided then the caller +// has no access to the corresponding value. +// For either of these pointers there are three cases: +// - If a NULL pointer is provided, the function generates an appropriate value internally, +// but does not return it to the caller. +// - If a buffer is provided and the buffer is all-zero, the function generates an appropriate +// value internally and returns it in the buffer. +// - If a buffer is provided and the buffer is nonzero, the value in the buffer is used for +// the corresponding protocol parameter without further validation. +// This last option is useful for testing as it lets the caller specify all the random choices. +// Rand and Mask buffers are MSByte first. +// +// Note: currently this method only supports the NIST P256 curve. If we ever want to support other curves +// we'll update this function to accept a curve parameter and update the SAL annotations +// of the other functions. +// + +SYMCRYPT_ERROR +SymCrypt802_11SaeCustomCreatePT( + _In_reads_( cbSsid ) PCBYTE pbSsid, + SIZE_T cbSsid, + _In_reads_( cbPassword ) PCBYTE pbPassword, + SIZE_T cbPassword, + _In_reads_opt_( cbPasswordIdentifier ) PCBYTE pbPasswordIdentifier, + SIZE_T cbPasswordIdentifier, + _Out_writes_( 64 ) PBYTE pbPT ); +// +// Generate the PT secret element for use with the SAE Hash-to-Element algorithm, as described in +// section 12.4.4.2.3 of the 802.11 spec ("Hash-to-curve generation of the password element with +// ECC groups"). The PT value can be "stored until needed to generate a session specific PWE." +// +// - pbSsid, cbSsid SSID for the connection as a string of bytes +// - pbPassword, cbPassword Password buffer +// - pbPasswordIdentifier, cbPasswordIdentifier Optional password identifier, as a string of bytes +// - pbPT Out pointer to PT (as a byte buffer) +// +// This function uses the NIST P256 curve. +// + + +SYMCRYPT_ERROR +SymCrypt802_11SaeCustomCreatePTGeneric( + SYMCRYPT_802_11_SAE_GROUP group, + _In_reads_( cbSsid ) PCBYTE pbSsid, + SIZE_T cbSsid, + _In_reads_( cbPassword ) PCBYTE pbPassword, + SIZE_T cbPassword, + _In_reads_opt_( cbPasswordIdentifier ) PCBYTE pbPasswordIdentifier, + SIZE_T cbPasswordIdentifier, + _Out_writes_( cbPT ) PBYTE pbPT, + SIZE_T cbPT ); +// +// Generic version of the SymCrypt802_11SaeCustomCreatePT() function that allows elliptic curve +// group selection. +// Generate the PT secret element for use with the SAE Hash-to-Element algorithm, as described in +// section 12.4.4.2.3 of the 802.11 spec ("Hash-to-curve generation of the password element with +// ECC groups"). The PT value can be "stored until needed to generate a session specific PWE." +// +// - group Group number for the elliptic curve selection +// - pbSsid, cbSsid SSID for the connection as a string of bytes +// - pbPassword, cbPassword Password buffer +// - pbPasswordIdentifier, cbPasswordIdentifier Optional password identifier, as a string of bytes +// - pbPT, cbPt PT (as a byte buffer) +// + + +SYMCRYPT_ERROR +SymCrypt802_11SaeCustomInitH2E( + _Out_ PSYMCRYPT_802_11_SAE_CUSTOM_STATE pState, + _In_reads_( 64 ) PCBYTE pbPT, + _In_reads_( 6 ) PCBYTE pbMacA, + _In_reads_( 6 ) PCBYTE pbMacB, + _Inout_updates_opt_( 32 ) PBYTE pbRand, + _Inout_updates_opt_( 32 ) PBYTE pbMask ); +// +// Initialize the state object using the Hash-to-Element algorithm, using the PT value calculated +// by SymCrypt802_11SaeCustomCreatePT. +// +// - pState Protocol state +// - pbPT PT value calculated using SymCrypt802_11SaeCustomCreatePT() +// - pbMacA, pbMacB Two 6-byte MAC addresses +// - pbRand Optional pointer to Rand buffer. See SymCrypt802_11SaeCustomInit() documentation for the use of this parameter. +// - pbMask Optional pointer to Mask buffer. See SymCrypt802_11SaeCustomInit() documentation for the use of this parameter. +// +// See the comment on SymCrypt802_11SaeCustomInit() for more details about the pbRand and pbMask +// parameters. +// + + +SYMCRYPT_ERROR +SymCrypt802_11SaeCustomInitH2EGeneric( + _Out_ PSYMCRYPT_802_11_SAE_CUSTOM_STATE pState, + SYMCRYPT_802_11_SAE_GROUP group, + _In_reads_( cbPT ) PCBYTE pbPT, + SIZE_T cbPT, + _In_reads_( 6 ) PCBYTE pbMacA, + _In_reads_( 6 ) PCBYTE pbMacB, + _Inout_updates_opt_( cbRand ) PBYTE pbRand, + SIZE_T cbRand, + _Inout_updates_opt_( cbMask ) PBYTE pbMask, + SIZE_T cbMask ); +// +// Generic version of the SymCrypt802_11SaeCustomInitH2E() function that allows elliptic curve +// group selection. +// Initialize the state object using the Hash-to-Element algorithm, using the PT value calculated +// by SymCrypt802_11SaeCustomCreatePT. +// +// - pState Protocol state +// - group Group number for the elliptic curve selection +// - pbPT, cbPT PT value (as a byte array) calculated using SymCrypt802_11SaeCustomCreatePTGeneric(). +// PT must be generated on the same elliptic curve as the one supplied in the group parameter. +// - pbMacA, pbMacB Two 6-byte MAC addresses +// - pbRand, cbRand Optional Rand buffer. See SymCrypt802_11SaeCustomInit() documentation for the use of this parameter. +// - pbMask, cbMask Optional Mask buffer. See SymCrypt802_11SaeCustomInit() documentation for the use of this parameter. +// +// See the comment on SymCrypt802_11SaeCustomInit() for more details about the pbRand and pbMask +// parameters. +// + +SYMCRYPT_ERROR +SymCrypt802_11SaeCustomCommitCreate( + _In_ PCSYMCRYPT_802_11_SAE_CUSTOM_STATE pState, + _Out_writes_( 32 ) PBYTE pbCommitScalar, + _Out_writes_( 64 ) PBYTE pbCommitElement ); +// +// Compute the commit-scalar and commit-element values for the Commit message. +// This function does not update the pState and is multi-thread safe w.r.t. the pState object. +// +// - pState Protocol state that was initialized with SymCrypt802_11SaeCustomInit(). +// - pCommitScalar Buffer that receives the commit-scalar value, MSByte first. +// - pCommitElement Buffer that receives the commit-element value encoded as two values +// (x,y) in order, each value in MSByte first. +// + +SYMCRYPT_ERROR +SymCrypt802_11SaeCustomCommitCreateGeneric( + _In_ PCSYMCRYPT_802_11_SAE_CUSTOM_STATE pState, + _Out_writes_( cbCommitScalar ) PBYTE pbCommitScalar, + SIZE_T cbCommitScalar, + _Out_writes_( cbCommitElement ) PBYTE pbCommitElement, + SIZE_T cbCommitElement); +// +// Generic version of the SymCrypt802_11SaeCustomCommitCreate() function that uses the +// state object to determine which elliptic curve group is selected. +// Compute the commit-scalar and commit-element values for the Commit message. +// This function does not update the pState and is multi-thread safe w.r.t. the pState object. +// +// - pState Protocol state that was initialized with SymCrypt802_11SaeCustomInit(). +// - pbCommitScalar, cbCommitScalar Buffer that receives the commit-scalar value, MSByte first. +// - pbCommitElement, cbCommitElement Buffer that receives the commit-element value encoded as two values +// (x,y) in order, each value in MSByte first. +// + +SYMCRYPT_ERROR +SymCrypt802_11SaeCustomCommitProcess( + _In_ PCSYMCRYPT_802_11_SAE_CUSTOM_STATE pState, + _In_reads_( 32 ) PCBYTE pbPeerCommitScalar, + _In_reads_( 64 ) PCBYTE pbPeerCommitElement, + _Out_writes_( 32 ) PBYTE pbSharedSecret, + _Out_writes_( 32 ) PBYTE pbScalarSum ); +// +// Process the commit message received from the peer. +// This function does not update pState and is multi-thread safe w.r.t. the pState object. +// +// - pState pointer to the protocol state. +// - pbPeerCommitScalar pointer to the peer's commit scalar value, MSByte first. +// - pbPeerCommitElement pointer to the peer's commit element, see CommitCreate for format. +// - pbSharedSecret buffer that receives the 'k' value that is the shared secret, MSByte first +// - pbScalarSum buffer that receives the sum of the two commit scalars, MSByte first +// + +SYMCRYPT_ERROR +SymCrypt802_11SaeCustomCommitProcessGeneric( + _In_ PCSYMCRYPT_802_11_SAE_CUSTOM_STATE pState, + _In_reads_( cbPeerCommitScalar ) PCBYTE pbPeerCommitScalar, + SIZE_T cbPeerCommitScalar, + _In_reads_( cbPeerCommitElement ) PCBYTE pbPeerCommitElement, + SIZE_T cbPeerCommitElement, + _Out_writes_( cbSharedSecret ) PBYTE pbSharedSecret, + SIZE_T cbSharedSecret, + _Out_writes_( cbScalarSum ) PBYTE pbScalarSum, + SIZE_T cbScalarSum ); +// +// Generic version of the SymCrypt802_11SaeCustomCommitProcess() function that uses the +// state object to determine which elliptic curve group is selected. +// Process the commit message received from the peer. +// This function does not update pState and is multi-thread safe w.r.t. the pState object. +// +// - pState pointer to the protocol state. +// - pbPeerCommitScalar, cbPeerCommitScalar pointer to the peer's commit scalar value, MSByte first. +// - pbPeerCommitElement, cbPeerCommitElement pointer to the peer's commit element, see CommitCreate for format. +// - pbSharedSecret, cbSharedSecret buffer that receives the 'k' value that is the shared secret, MSByte first +// - pbScalarSum, pbSharedSecret buffer that receives the sum of the two commit scalars, MSByte first +// + + +VOID +SymCrypt802_11SaeCustomDestroy( + _Inout_ PSYMCRYPT_802_11_SAE_CUSTOM_STATE pState ); +// +// Wipe a state object. +// After this call the memory used for pState is uninitialized and can be used for other purposes. +// Note that it is not safe to just wipe the memory of the state object as the state +// object contains pointers to other allocations, which can contain secret information. +// The only way to safely destroy a state is to use this function. +// + +//=================================================================== + + + +#ifdef __cplusplus +} +#endif diff --git a/libs/symcrypt/lib/3des.c b/libs/symcrypt/lib/3des.c new file mode 100644 index 00000000000..6a4091a90ca --- /dev/null +++ b/libs/symcrypt/lib/3des.c @@ -0,0 +1,831 @@ +// +// 3des.c Routines for DES and 3DES +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// This is an updated implementation that is carefully reviewed to be fully copyrighted by +// Microsoft. Our previous implementation was partially based on a very old public domain +// implementation. +// According to Andrew Tucker (atucker) there was a claim many years ago about the copyright of +// our DES code. Working with LCA (legal) it was determined that the DES implementation in RSA32.lib +// was a Microsoft derivative of a public-domain implementation, and therefore is clear of +// any IP issues. To avoid any further claims we have now scrubbed this implementation from all +// copyrightable elements derived from outside sources. +// +// We take non-copyrightable items from the old implementations such as +// - lookup tables +// - algorithm for various bit permutations +// - Any variable & function names that are already MS-generated. +// - Other MS-generated code elements (e.g. SymCrypt integration) +// Some of the considerations we made are: +// Most of the functionality of DES is required by the FIPS standard and there is not much +// choice on how to code it; elements required by the standard are not copyright protected. +// The lookup tables themselves are not copyrightable as they have no artistic expression. +// The format of the lookup tables is almost completely determined by the standard and the algorithm +// used to access them. Any further layout and structure are all standard C conventions. +// Algorithm tricks such as Hoey's IP implementation are not copyrightable but are patentable. +// Fortunately all the techniques we use have been around long enough that any patents have expired. +// + +// +// Feb 2018, Niels Ferguson +// + +#include "precomp.h" + +// +// Tables to describe the DES and 3DES block ciphers so that the generic +// chaining mode functions can use them. +// We have no optimized mode-specific code as the DES block is so slow that there is +// very little to be gained. +// + +const SYMCRYPT_BLOCKCIPHER SymCrypt3DesBlockCipher_default = { + SymCrypt3DesExpandKey, // PSYMCRYPT_BLOCKCIPHER_EXPAND_KEY expandKeyFunc; + SymCrypt3DesEncrypt, // PSYMCRYPT_BLOCKCIPHER_CRYPT encryptFunc; + SymCrypt3DesDecrypt, // PSYMCRYPT_BLOCKCIPHER_CRYPT decryptFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_CRYPT_ECB ecbEncryptFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_CRYPT_ECB ecbDecryptFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_CRYPT_MODE cbcEncryptFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_CRYPT_MODE cbcDecryptFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_MAC_MODE cbcMacFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_CRYPT_MODE ctrMsbFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_AEADPART_MODE gcmEncryptPartFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_AEADPART_MODE gcmDecryptPartFunc; + 8, // SIZE_T blockSize; + sizeof( SYMCRYPT_3DES_EXPANDED_KEY ), // SIZE_T expandedKeySize; // = sizeof( SYMCRYPT_XXX_EXPANDED_KEY ) +}; + +const SYMCRYPT_BLOCKCIPHER SymCryptDesBlockCipher_default = { + SymCryptDesExpandKey, // PSYMCRYPT_BLOCKCIPHER_EXPAND_KEY expandKeyFunc; + SymCryptDesEncrypt, // PSYMCRYPT_BLOCKCIPHER_CRYPT encryptFunc; + SymCryptDesDecrypt, // PSYMCRYPT_BLOCKCIPHER_CRYPT decryptFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_CRYPT_ECB ecbEncryptFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_CRYPT_ECB ecbDecryptFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_CRYPT_MODE cbcEncryptFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_CRYPT_MODE cbcDecryptFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_MAC_MODE cbcMacFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_CRYPT_MODE ctrMsbFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_AEADPART_MODE gcmEncryptPartFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_AEADPART_MODE gcmDecryptPartFunc; + 8, // SIZE_T blockSize; + sizeof( SYMCRYPT_DES_EXPANDED_KEY ), // SIZE_T expandedKeySize; // = sizeof( SYMCRYPT_XXX_EXPANDED_KEY ) +}; + +const PCSYMCRYPT_BLOCKCIPHER SymCrypt3DesBlockCipher = &SymCrypt3DesBlockCipher_default; +const PCSYMCRYPT_BLOCKCIPHER SymCryptDesBlockCipher = &SymCryptDesBlockCipher_default; + +extern SYMCRYPT_ALIGN_AT(256) const UINT32 SymCryptDesSpbox[8][64]; // Combined S and P tables +extern SYMCRYPT_ALIGN_AT(256) const UINT32 SymCryptDesKeySelect[8][64]; + + +// +// The SWAP_BITS_WITHIN_UINT32 macro swaps bits within a UINT32 value +// SWAP_BITS_WITHIN_UINT32( _value, _shift, _mask ) +// swaps each bit in _value selected by _mask with the bit _shift positions to the left +// Thus it swaps (_value & _mask) with (_value & (_mask << _shift)) +// + +#define SWAP_BITS_WITHIN_UINT32( _value, _shift, _mask ) \ +{ \ + UINT32 _tmp; \ + _tmp = ((_value) ^ ((_value) >> (_shift))) & (_mask ); \ + _value = (_value) ^ _tmp ^ (_tmp << (_shift)); \ +} + +// +// The SWAP_BITS_BETWEEN_UINT32 macro swaps bits between two UINT32 values +// SWAP_BITS_BETWEEN_UINT32( _v1, _v2, _shift, _mask ) +// swaps bits in _v1 selected by _mask with bits in _v2 selected by _mask << _shift +// + +#define SWAP_BITS_BETWEEN_UINT32( _v1, _v2, _shift, _mask ) \ +{ \ + UINT32 _tmp; \ + _tmp = ((_v1) ^ ((_v2) >> (_shift))) & (_mask); \ + _v1 ^= _tmp; \ + _v2 ^= (_tmp << (_shift )); \ +} + +// +// For each round, a bit that states whether the key schedule shift registers are clocked twice +// The data is straight from the standard. +// +static const BYTE SymCryptDesDoubleShift[16]={0,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0}; + +////////////////////////// +// DES +// We just implement DES as 3DES. +// People using DES have bigger problems than bad performance. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDesExpandKey( + _Out_ PSYMCRYPT_DES_EXPANDED_KEY pExpandedKey, + _In_reads_( cbKey ) PCBYTE pbKey, + SIZE_T cbKey ) +{ + if( cbKey != 8 ) + { + // + // cbKey should be a compile-time constant in most cases, + // so this should be optimized away + // + return SYMCRYPT_WRONG_KEY_SIZE; + } + return SymCrypt3DesExpandKey( &pExpandedKey->threeDes, pbKey, cbKey ); +} + +VOID +SYMCRYPT_CALL +SymCryptDesEncrypt( + _In_ PCSYMCRYPT_DES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_DES_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_DES_BLOCK_SIZE ) PBYTE pbDst ) +{ + SymCrypt3DesEncrypt( &pExpandedKey->threeDes, pbSrc, pbDst ); +} + +VOID +SYMCRYPT_CALL +SymCryptDesDecrypt( + _In_ PCSYMCRYPT_DES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_DES_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_DES_BLOCK_SIZE ) PBYTE pbDst ) +{ + SymCrypt3DesDecrypt( &pExpandedKey->threeDes, pbSrc, pbDst ); +} + +// +// The 3DesCbcEncrypt/Decrypt functions are used to make converting code from +// older libraries to SymCrypt easier. +// + +VOID +SYMCRYPT_CALL +SymCrypt3DesCbcEncrypt( + _In_ PCSYMCRYPT_3DES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_3DES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + SYMCRYPT_ASSERT( SymCrypt3DesBlockCipher->blockSize == SYMCRYPT_3DES_BLOCK_SIZE ); + SymCryptCbcEncrypt( SymCrypt3DesBlockCipher, pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); +} + +VOID +SYMCRYPT_CALL +SymCrypt3DesCbcDecrypt( + _In_ PCSYMCRYPT_3DES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_3DES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + SYMCRYPT_ASSERT( SymCrypt3DesBlockCipher->blockSize == SYMCRYPT_3DES_BLOCK_SIZE ); + SymCryptCbcDecrypt( SymCrypt3DesBlockCipher, pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); +} + + + +VOID +SYMCRYPT_CALL +SymCryptDesExpandSingleKey( + _Out_writes_bytes_(128) UINT32 expandedKeyTable[16][2], + _In_reads_(8) PCBYTE pKey ) +{ + UINT32 Cr, Dr; // The C_r D_r values of FIPS 43 for round value r + UINT32 r; // round + UINT32 K1, K2; // round keys after the permuted choice 2 + UINT32 tmp; + + // + // We follow the FIPS 43 flow quite closely and have not optimized the key expansion much. + // Key expansion is not performance-critical. + // + + // Load the key + Cr = SYMCRYPT_LOAD_LSBFIRST32( pKey ); + Dr = SYMCRYPT_LOAD_LSBFIRST32( pKey + 4 ); + + // + // The Permuted Choice 1 can be done mostly with a sequence of bit swaps. + // The algorithm we use is derived from our earlier implementation and might potentially + // derive from an external source. + // But the algorithm cannot be copyrighted, only patented, and if there were any patents + // they have expired by now. + // The expression of the algorithm in code is purely MS generated, and so not encumbered + // by external copyrights. + // This algorithm is really just a transposition of the bits when viewed as an 8x8 matrix + // with an additional permutation on the output side. + // + SWAP_BITS_BETWEEN_UINT32( Cr, Dr, 4, 0x0f0f0f0f ); + SWAP_BITS_WITHIN_UINT32( Dr, 18, 0x00003333 ); + SWAP_BITS_WITHIN_UINT32( Cr, 18, 0x00003333 ); + SWAP_BITS_BETWEEN_UINT32( Cr, Dr, 1, 0x55555555 ); + SWAP_BITS_BETWEEN_UINT32( Dr, Cr, 8, 0x00ff00ff ); + SWAP_BITS_BETWEEN_UINT32( Cr, Dr, 1, 0x55555555 ); + SWAP_BITS_WITHIN_UINT32( Dr, 16, 0xff ); + + // Have to re-arrange C and D a tiny bit so that each contains 28 bits and we throw away 8 bits + Dr = (Dr & 0x00ffffff) | ((Cr & 0xf0000000 ) >> 4 ); + Cr = (Cr & 0x0fffffff); + + for( r = 0; r < 16; r++) + { + // + // Cr and Dr are the two key shift registers, they are rotated once or twice for each round. + // + + if( SymCryptDesDoubleShift[ r ] ) { + Cr = ((Cr >> 2) | (Cr << 26)); + Dr = ((Dr >> 2) | (Dr << 26)); + } else { + Cr = ((Cr >> 1) | (Cr << 27)); + Dr = ((Dr >> 1) | (Dr << 27)); + } + + Cr &= 0x0fffffff; + Dr &= 0x0fffffff; + + // + // The Permuted Choice 2 is done using table lookups + // Not all bits of C and D are used, so we cut those out using shifts and masks, + // and then index 6 bits at a time into lookup tables that implement the bit relocation. + // + + K1 = SymCryptDesKeySelect[0][ (Cr )&0x3f ] | + SymCryptDesKeySelect[1][((Cr >> 6)&0x03) | ((Cr >> 7)&0x3c)] | + SymCryptDesKeySelect[2][((Cr >> 13)&0x0f) | ((Cr >> 14)&0x30)] | + SymCryptDesKeySelect[3][((Cr >> 20)&0x01) | ((Cr >> 21)&0x06) | ((Cr >> 22)&0x38)]; + + K2 = SymCryptDesKeySelect[4][ (Dr )&0x3f ] | + SymCryptDesKeySelect[5][((Dr >> 7)&0x03) | ((Dr >> 8)&0x3c)] | + SymCryptDesKeySelect[6][ (Dr >> 15)&0x3f ] | + SymCryptDesKeySelect[7][((Dr >> 21)&0x0f) | ((Dr >> 22)&0x30)]; + + // + // After this we still have to swap the halves of K1 and K2, that is done below + // as part of the formatting of the round key + // + + // + // So far we have recreated the round keys per the standard. + // The round keys are stored rotated by 2 as the encrypt/decrypt code finds that easier. + // We could update the tables to do this, but key expansion is not used that frequently, + // and it is not worth the effort to update the tables. + // + // We don't worry about extraneous bits in unused positions as the F function masks out unused bits. + // + + tmp = ((K2 << 16) | (K1 & 0x0000ffff)) ; + expandedKeyTable[r][0] = ROL32(tmp, 2); + + tmp = ((K1 >> 16) | (K2 & 0xffff0000)); + expandedKeyTable[r][1] = ROL32(tmp, 6); + } +} + + +SYMCRYPT_NOINLINE +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCrypt3DesExpandKey( + _Out_ PSYMCRYPT_3DES_EXPANDED_KEY pExpandedKey, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ) +{ + SIZE_T keyIndex = 0; + int i; + + if( cbKey != 8 && cbKey != 16 && cbKey != 24 ) + { + return SYMCRYPT_WRONG_KEY_SIZE; + } + + // + // A loop that goes over the provided key as a circular buffer provides + // the right result with the least complexity. + // This is inefficient for the cases cbKey=8 and cbKey=16, but those should + // not be used anyway. + // + for( i=0; i<3; i++ ) + { + SYMCRYPT_ASSERT( keyIndex <= cbKey - 8 ); // help PreFast + SymCryptDesExpandSingleKey( pExpandedKey->roundKey[i], pbKey + keyIndex ); + keyIndex = (keyIndex + 8) % cbKey; + } + + SYMCRYPT_SET_MAGIC( pExpandedKey ); + + return SYMCRYPT_NO_ERROR; +} + + +// +// The DES round function +// This is straight from the standard. +// Ta and Tb each contain 4 sets of 6 bits that are S-box inputs +// We interleave the use of Ta and Tb to provide better CPU scheduling on weak compilers. +// We ensure that the input bits appear in bits 2-7 of the index to avoid a scaled index +// which can be slower on some CPUs. +// +#define F(L, R, keyptr) { \ + Ta = keyptr[0] ^ R; \ + Tb = keyptr[1] ^ R; \ + Tb = ROR32(Tb, 4); \ + L ^= *(UINT32 *)((PBYTE)SymCryptDesSpbox[0] + ( Ta & 0xfc)); \ + L ^= *(UINT32 *)((PBYTE)SymCryptDesSpbox[1] + ( Tb & 0xfc)); \ + L ^= *(UINT32 *)((PBYTE)SymCryptDesSpbox[2] + ((Ta>> 8)& 0xfc)); \ + L ^= *(UINT32 *)((PBYTE)SymCryptDesSpbox[3] + ((Tb>> 8)& 0xfc)); \ + L ^= *(UINT32 *)((PBYTE)SymCryptDesSpbox[4] + ((Ta>>16)& 0xfc)); \ + L ^= *(UINT32 *)((PBYTE)SymCryptDesSpbox[5] + ((Tb>>16)& 0xfc)); \ + L ^= *(UINT32 *)((PBYTE)SymCryptDesSpbox[6] + ((Ta>>24)& 0xfc)); \ + L ^= *(UINT32 *)((PBYTE)SymCryptDesSpbox[7] + ((Tb>>24)& 0xfc)); } + + + +// +// Block encryption. +// The noinline stops the compiler from inlining the code and creating additional +// implementations which would require separate FIPS selftests. +// +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCrypt3DesEncrypt( + _In_ PCSYMCRYPT_3DES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_3DES_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_3DES_BLOCK_SIZE ) PBYTE pbDst ) +{ + UINT32 L, R, Ta, Tb; + int r; + + SYMCRYPT_CHECK_MAGIC( pExpandedKey ); + + R = SYMCRYPT_LOAD_LSBFIRST32( pbSrc ); + L = SYMCRYPT_LOAD_LSBFIRST32( pbSrc + 4 ); + + // + // Hoey's wonderful initial permutation algorithm, from Outerbridge + // (see Schneier p 478) + // + // The algorithm we use is derived (through several intermediate forms) from the mentioned source. + // But the algorithm cannot be copyrighted, only patented, and if there were any patents + // they have expired by now. + // The expression of the algorithm in code is purely MS generated, + // within the confines of implementing the algorithm in the best way such that even a simple + // compiler will create good code. + // + + R = ROL32(R, 4); + Ta = (L ^ R) & 0xf0f0f0f0; + L ^= Ta; + R ^= Ta; + + L = ROL32(L, 20); + Ta = (L ^ R) & 0xfff0000f; + R ^= Ta; + L ^= Ta; + + L = ROL32(L,14); + Ta = (L ^ R) & 0x33333333; + R ^= Ta; + L ^= Ta; + + R = ROL32(R, 22); + Ta = (L ^ R) & 0x03fc03fc; + R ^= Ta; + L ^= Ta; + + R = ROL32(R, 9); + Ta = (L ^ R) & 0xaaaaaaaa; + R ^= Ta; + L ^= Ta; + + L = ROL32(L, 1); + + // + // First: encryption + // + for( r=0; r<16; r += 2 ) + { + F( L, R, pExpandedKey->roundKey[0][r ] ); + F( R, L, pExpandedKey->roundKey[0][r+1] ); + } + + // + // Second: decryption + // Note that L and R are swapped here, and the round counter counts down. + // + for( r=14; r>=0; r -= 2 ) + { + F( R, L, pExpandedKey->roundKey[1][r+1] ); + F( L, R, pExpandedKey->roundKey[1][r ] ); + } + + // + // Third: encryption + // + for( r=0; r<16; r += 2 ) + { + F( L, R, pExpandedKey->roundKey[2][r ] ); + F( R, L, pExpandedKey->roundKey[2][r+1] ); + } + + R = ROR32(R, 1); + Ta = (L ^ R) & 0xaaaaaaaa; + R ^= Ta; + L ^= Ta; + + L = ROR32(L, 9); + Ta = (L ^ R) & 0x03fc03fc; + R ^= Ta; + L ^= Ta; + + L = ROR32(L, 22); + Ta = (L ^ R) & 0x33333333; + R ^= Ta; + L ^= Ta; + + R = ROR32(R, 14); + Ta = (L ^ R) & 0xfff0000f; + R ^= Ta; + L ^= Ta; + + R = ROR32(R, 20); + Ta = (L ^ R) & 0xf0f0f0f0; + R ^= Ta; + L ^= Ta; + + L = ROR32(L, 4); + + SYMCRYPT_STORE_LSBFIRST32( pbDst, L ); + SYMCRYPT_STORE_LSBFIRST32( pbDst + 4, R ); +} + + +// +// Block decrypt +// The noinline stops the compiler from inlining the code and creating additional +// implementations which would require separate FIPS selftests. +// +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCrypt3DesDecrypt( + _In_ PCSYMCRYPT_3DES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_3DES_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_3DES_BLOCK_SIZE ) PBYTE pbDst ) +{ + UINT32 L, R, Ta, Tb; + int r; + + SYMCRYPT_CHECK_MAGIC( pExpandedKey ); + + R = SYMCRYPT_LOAD_LSBFIRST32( pbSrc ); + L = SYMCRYPT_LOAD_LSBFIRST32( pbSrc + 4 ); + + R = ROL32(R, 4); + Ta = (L ^ R) & 0xf0f0f0f0; + L ^= Ta; + R ^= Ta; + + L = ROL32(L, 20); + Ta = (L ^ R) & 0xfff0000f; + L ^= Ta; + R ^= Ta; + + L = ROL32(L, 14); + Ta = (L ^ R) & 0x33333333; + L ^= Ta; + R ^= Ta; + + R = ROL32(R, 22); + Ta = (L ^ R) & 0x03fc03fc; + L ^= Ta; + R ^= Ta; + + R = ROL32(R, 9); + Ta = (L ^ R) & 0xaaaaaaaa; + L ^= Ta; + R ^= Ta; + + L = ROL32(L, 1); + + + // Decrypt with key 2 + for( r=14; r>=0; r -= 2 ) + { + F( L, R, pExpandedKey->roundKey[2][r+1] ); + F( R, L, pExpandedKey->roundKey[2][r ] ); + } + + // Encrypt with key 1 + for( r=0; r<16; r += 2 ) + { + F( R, L, pExpandedKey->roundKey[1][r ] ); + F( L, R, pExpandedKey->roundKey[1][r+1] ); + } + + // Decrypt with key 0 + for( r=14; r>=0; r -= 2 ) + { + F( L, R, pExpandedKey->roundKey[0][r+1] ); + F( R, L, pExpandedKey->roundKey[0][r ] ); + } + + /* Inverse permutation, also from Hoey via Outerbridge and Schneier */ + + R = ROR32(R, 1); + Ta = (L ^ R) & 0xaaaaaaaa; + L ^= Ta; + R ^= Ta; + + L = ROR32(L, 9); + Ta = (L ^ R) & 0x03fc03fc; + L ^= Ta; + R ^= Ta; + + L = ROR32(L, 22); + Ta = (L ^ R) & 0x33333333; + L ^= Ta; + R ^= Ta; + + R = ROR32(R, 14); + Ta = (L ^ R) & 0xfff0000f; + L ^= Ta; + R ^= Ta; + + R = ROR32(R, 20); + Ta = (L ^ R) & 0xf0f0f0f0; + L ^= Ta; + R ^= Ta; + + L = ROR32(L, 4); + + SYMCRYPT_STORE_LSBFIRST32( pbDst, L ); + SYMCRYPT_STORE_LSBFIRST32( pbDst + 4, R ); +} + + + +VOID +SYMCRYPT_CALL +SymCryptDesSetOddParity( + _Inout_updates_( cbData ) PBYTE pbData, + _In_ SIZE_T cbData ) +// +// For each byte, set bit 0 such that the byte parity is odd. +// This function is side-channel safe +// +{ + SIZE_T i; + BYTE b, t; + for( i=0; i<cbData; i++ ) + { + // We obey the read-once write-once rule + b = *pbData; + + t = b ^ (b>>4); // parity(b) = parity( t & 0xf ) + t ^= t>>2; // = parity( t & 0x3 ) + t ^= t>>1; // = parity( t & 0x1 ) + *pbData++ = b ^ (t&1) ^ 1; + } +} + + +// +// Test vectors for self test +// +static const BYTE SP800_67Key[24] = { + 0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF, + 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF, 0x01, + 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF, 0x01, 0x23, +}; + +static const BYTE des3KnownPlaintext[8] = { + 0x4E, 0x6F, 0x77, 0x20, 0x69, 0x73, 0x20, 0x74, +}; + +static const BYTE des3KnownCiphertext[8] = { + 0x31, 0x4F, 0x83, 0x27, 0xFA, 0x7A, 0x09, 0xA8, +}; + +static const BYTE desKnownCiphertext[8] = { + 0x3F, 0xA4, 0x0E, 0x8A, 0x98, 0x4D, 0x48, 0x15, +}; + + +VOID +SYMCRYPT_CALL +SymCryptDesSelftest(void) +{ + BYTE buf[SYMCRYPT_DES_BLOCK_SIZE]; + SYMCRYPT_DES_EXPANDED_KEY key; + + if( SymCryptDesExpandKey( &key, SP800_67Key, 8 ) != SYMCRYPT_NO_ERROR ) + { + SymCryptFatal( 'desa' ); + } + + SymCryptDesEncrypt( &key, des3KnownPlaintext, buf ); + + SymCryptInjectError( buf, SYMCRYPT_DES_BLOCK_SIZE ); + + if( memcmp( buf, desKnownCiphertext, SYMCRYPT_DES_BLOCK_SIZE ) != 0 ) + { + SymCryptFatal( 'desb' ); + } + + SymCryptDesDecrypt( &key, desKnownCiphertext, buf ); + + SymCryptInjectError( buf, SYMCRYPT_DES_BLOCK_SIZE ); + + if( memcmp( buf, des3KnownPlaintext, SYMCRYPT_DES_BLOCK_SIZE ) != 0 ) + { + SymCryptFatal( 'desc' ); + } +} + + +VOID +SYMCRYPT_CALL +SymCrypt3DesSelftest(void) +{ + BYTE buf[SYMCRYPT_3DES_BLOCK_SIZE]; + SYMCRYPT_3DES_EXPANDED_KEY key; + + if( SymCrypt3DesExpandKey( &key, SP800_67Key, 24 ) != SYMCRYPT_NO_ERROR ) + { + SymCryptFatal( 'des3' ); + } + + SymCrypt3DesEncrypt( &key, des3KnownPlaintext, buf ); + + SymCryptInjectError( buf, SYMCRYPT_3DES_BLOCK_SIZE ); + + if( memcmp( buf, des3KnownCiphertext, SYMCRYPT_3DES_BLOCK_SIZE ) != 0 ) + { + SymCryptFatal( 'des4' ); + } + + SymCrypt3DesDecrypt( &key, des3KnownCiphertext, buf ); + + SymCryptInjectError( buf, SYMCRYPT_3DES_BLOCK_SIZE ); + + if( memcmp( buf, des3KnownPlaintext, SYMCRYPT_3DES_BLOCK_SIZE ) != 0 ) + { + SymCryptFatal( 'des5' ); + } +} + + + + + +#if 0 +//////////////////////////////////////////// +// Useful tables, kept for future reference. +// + +// Tables defined in the Data Encryption Standard documents +// Three of these tables, the initial permutation, the final +// permutation and the expansion operator, are regular enough that +// for speed, we hard-code them. They're here for reference only. +// Also, the S and P boxes are used by a separate program, gensp.c, +// to build the combined SP box, Spbox[]. They're also here just +// for reference. +// +// initial permutation IP +static unsigned BYTE ip[] = { + 58, 50, 42, 34, 26, 18, 10, 2, + 60, 52, 44, 36, 28, 20, 12, 4, + 62, 54, 46, 38, 30, 22, 14, 6, + 64, 56, 48, 40, 32, 24, 16, 8, + 57, 49, 41, 33, 25, 17, 9, 1, + 59, 51, 43, 35, 27, 19, 11, 3, + 61, 53, 45, 37, 29, 21, 13, 5, + 63, 55, 47, 39, 31, 23, 15, 7 +}; + +// final permutation IP^-1 +static unsigned BYTE fp[] = { + 40, 8, 48, 16, 56, 24, 64, 32, + 39, 7, 47, 15, 55, 23, 63, 31, + 38, 6, 46, 14, 54, 22, 62, 30, + 37, 5, 45, 13, 53, 21, 61, 29, + 36, 4, 44, 12, 52, 20, 60, 28, + 35, 3, 43, 11, 51, 19, 59, 27, + 34, 2, 42, 10, 50, 18, 58, 26, + 33, 1, 41, 9, 49, 17, 57, 25 +}; + +// expansion operation matrix +static unsigned BYTE ei[] = { + 32, 1, 2, 3, 4, 5, + 4, 5, 6, 7, 8, 9, + 8, 9, 10, 11, 12, 13, + 12, 13, 14, 15, 16, 17, + 16, 17, 18, 19, 20, 21, + 20, 21, 22, 23, 24, 25, + 24, 25, 26, 27, 28, 29, + 28, 29, 30, 31, 32, 1 +}; + +// The (in)famous S-boxes +static unsigned BYTE sbox[8][64] = { + // S1 + 14, 4, 13, 1, 2, 15, 11, 8, 3, 10, 6, 12, 5, 9, 0, 7, + 0, 15, 7, 4, 14, 2, 13, 1, 10, 6, 12, 11, 9, 5, 3, 8, + 4, 1, 14, 8, 13, 6, 2, 11, 15, 12, 9, 7, 3, 10, 5, 0, + 15, 12, 8, 2, 4, 9, 1, 7, 5, 11, 3, 14, 10, 0, 6, 13, + + // S2 + 15, 1, 8, 14, 6, 11, 3, 4, 9, 7, 2, 13, 12, 0, 5, 10, + 3, 13, 4, 7, 15, 2, 8, 14, 12, 0, 1, 10, 6, 9, 11, 5, + 0, 14, 7, 11, 10, 4, 13, 1, 5, 8, 12, 6, 9, 3, 2, 15, + 13, 8, 10, 1, 3, 15, 4, 2, 11, 6, 7, 12, 0, 5, 14, 9, + + // S3 + 10, 0, 9, 14, 6, 3, 15, 5, 1, 13, 12, 7, 11, 4, 2, 8, + 13, 7, 0, 9, 3, 4, 6, 10, 2, 8, 5, 14, 12, 11, 15, 1, + 13, 6, 4, 9, 8, 15, 3, 0, 11, 1, 2, 12, 5, 10, 14, 7, + 1, 10, 13, 0, 6, 9, 8, 7, 4, 15, 14, 3, 11, 5, 2, 12, + + // S4 + 7, 13, 14, 3, 0, 6, 9, 10, 1, 2, 8, 5, 11, 12, 4, 15, + 13, 8, 11, 5, 6, 15, 0, 3, 4, 7, 2, 12, 1, 10, 14, 9, + 10, 6, 9, 0, 12, 11, 7, 13, 15, 1, 3, 14, 5, 2, 8, 4, + 3, 15, 0, 6, 10, 1, 13, 8, 9, 4, 5, 11, 12, 7, 2, 14, + + // S5 + 2, 12, 4, 1, 7, 10, 11, 6, 8, 5, 3, 15, 13, 0, 14, 9, + 14, 11, 2, 12, 4, 7, 13, 1, 5, 0, 15, 10, 3, 9, 8, 6, + 4, 2, 1, 11, 10, 13, 7, 8, 15, 9, 12, 5, 6, 3, 0, 14, + 11, 8, 12, 7, 1, 14, 2, 13, 6, 15, 0, 9, 10, 4, 5, 3, + + // S6 + 12, 1, 10, 15, 9, 2, 6, 8, 0, 13, 3, 4, 14, 7, 5, 11, + 10, 15, 4, 2, 7, 12, 9, 5, 6, 1, 13, 14, 0, 11, 3, 8, + 9, 14, 15, 5, 2, 8, 12, 3, 7, 0, 4, 10, 1, 13, 11, 6, + 4, 3, 2, 12, 9, 5, 15, 10, 11, 14, 1, 7, 6, 0, 8, 13, + + // S7 + 4, 11, 2, 14, 15, 0, 8, 13, 3, 12, 9, 7, 5, 10, 6, 1, + 13, 0, 11, 7, 4, 9, 1, 10, 14, 3, 5, 12, 2, 15, 8, 6, + 1, 4, 11, 13, 12, 3, 7, 14, 10, 15, 6, 8, 0, 5, 9, 2, + 6, 11, 13, 8, 1, 4, 10, 7, 9, 5, 0, 15, 14, 2, 3, 12, + + // S8 + 13, 2, 8, 4, 6, 15, 11, 1, 10, 9, 3, 14, 5, 0, 12, 7, + 1, 15, 13, 8, 10, 3, 7, 4, 12, 5, 6, 11, 0, 14, 9, 2, + 7, 11, 4, 1, 9, 12, 14, 2, 0, 6, 10, 13, 15, 3, 5, 8, + 2, 1, 14, 7, 4, 10, 8, 13, 15, 12, 9, 0, 3, 5, 6, 11 +}; + +// 32-bit permutation function P used on the output of the S-boxes +static unsigned BYTE p32i[] = { + 16, 7, 20, 21, + 29, 12, 28, 17, + 1, 15, 23, 26, + 5, 18, 31, 10, + 2, 8, 24, 14, + 32, 27, 3, 9, + 19, 13, 30, 6, + 22, 11, 4, 25 +}; + +// permuted choice table (key) +static unsigned BYTE pc1[] = { + 57, 49, 41, 33, 25, 17, 9, + 1, 58, 50, 42, 34, 26, 18, + 10, 2, 59, 51, 43, 35, 27, + 19, 11, 3, 60, 52, 44, 36, + + 63, 55, 47, 39, 31, 23, 15, + 7, 62, 54, 46, 38, 30, 22, + 14, 6, 61, 53, 45, 37, 29, + 21, 13, 5, 28, 20, 12, 4 +}; + +// number left rotations of pc1 +static unsigned BYTE totrot[] = { + 1,2,4,6,8,10,12,14,15,17,19,21,23,25,27,28 +}; + +// permuted choice key (table) +static unsigned BYTE pc2[] = { + 14, 17, 11, 24, 1, 5, + 3, 28, 15, 6, 21, 10, + 23, 19, 12, 4, 26, 8, + 16, 7, 27, 20, 13, 2, + 41, 52, 31, 37, 47, 55, + 30, 40, 51, 45, 33, 48, + 44, 49, 39, 56, 34, 53, + 46, 42, 50, 36, 29, 32 +}; + +#endif diff --git a/libs/symcrypt/lib/AesTables.c b/libs/symcrypt/lib/AesTables.c new file mode 100644 index 00000000000..ceda0c8a342 --- /dev/null +++ b/libs/symcrypt/lib/AesTables.c @@ -0,0 +1,899 @@ +// +// AesTables.c lookup tables for the AES implementation +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// +// We put these in a separate source file to keep the code file uncluttered. +// + +#include "precomp.h" + +// +// Alignments are chosen to reduce side-channel attacks through the TLB cache. +// We align each table to a multiple of the size within which we do data-dependent +// lookups. For example, the table below is aligned to 1024. It is not a secret +// that the 4 sub-tables are accessed, but which value inside each sub-table is a secret. +// Aligning to 1024 still leaves the data cache line leakage, but avoids any TLB-related leakage. +// +SYMCRYPT_ALIGN_AT(1024) const BYTE SymCryptAesSboxMatrixMult[4][256][4] = {{ // Main encryption tables +0xc6,0x63,0x63,0xa5, 0xf8,0x7c,0x7c,0x84, 0xee,0x77,0x77,0x99, 0xf6,0x7b,0x7b,0x8d, +0xff,0xf2,0xf2,0x0d, 0xd6,0x6b,0x6b,0xbd, 0xde,0x6f,0x6f,0xb1, 0x91,0xc5,0xc5,0x54, +0x60,0x30,0x30,0x50, 0x02,0x01,0x01,0x03, 0xce,0x67,0x67,0xa9, 0x56,0x2b,0x2b,0x7d, +0xe7,0xfe,0xfe,0x19, 0xb5,0xd7,0xd7,0x62, 0x4d,0xab,0xab,0xe6, 0xec,0x76,0x76,0x9a, +0x8f,0xca,0xca,0x45, 0x1f,0x82,0x82,0x9d, 0x89,0xc9,0xc9,0x40, 0xfa,0x7d,0x7d,0x87, +0xef,0xfa,0xfa,0x15, 0xb2,0x59,0x59,0xeb, 0x8e,0x47,0x47,0xc9, 0xfb,0xf0,0xf0,0x0b, +0x41,0xad,0xad,0xec, 0xb3,0xd4,0xd4,0x67, 0x5f,0xa2,0xa2,0xfd, 0x45,0xaf,0xaf,0xea, +0x23,0x9c,0x9c,0xbf, 0x53,0xa4,0xa4,0xf7, 0xe4,0x72,0x72,0x96, 0x9b,0xc0,0xc0,0x5b, +0x75,0xb7,0xb7,0xc2, 0xe1,0xfd,0xfd,0x1c, 0x3d,0x93,0x93,0xae, 0x4c,0x26,0x26,0x6a, +0x6c,0x36,0x36,0x5a, 0x7e,0x3f,0x3f,0x41, 0xf5,0xf7,0xf7,0x02, 0x83,0xcc,0xcc,0x4f, +0x68,0x34,0x34,0x5c, 0x51,0xa5,0xa5,0xf4, 0xd1,0xe5,0xe5,0x34, 0xf9,0xf1,0xf1,0x08, +0xe2,0x71,0x71,0x93, 0xab,0xd8,0xd8,0x73, 0x62,0x31,0x31,0x53, 0x2a,0x15,0x15,0x3f, +0x08,0x04,0x04,0x0c, 0x95,0xc7,0xc7,0x52, 0x46,0x23,0x23,0x65, 0x9d,0xc3,0xc3,0x5e, +0x30,0x18,0x18,0x28, 0x37,0x96,0x96,0xa1, 0x0a,0x05,0x05,0x0f, 0x2f,0x9a,0x9a,0xb5, +0x0e,0x07,0x07,0x09, 0x24,0x12,0x12,0x36, 0x1b,0x80,0x80,0x9b, 0xdf,0xe2,0xe2,0x3d, +0xcd,0xeb,0xeb,0x26, 0x4e,0x27,0x27,0x69, 0x7f,0xb2,0xb2,0xcd, 0xea,0x75,0x75,0x9f, +0x12,0x09,0x09,0x1b, 0x1d,0x83,0x83,0x9e, 0x58,0x2c,0x2c,0x74, 0x34,0x1a,0x1a,0x2e, +0x36,0x1b,0x1b,0x2d, 0xdc,0x6e,0x6e,0xb2, 0xb4,0x5a,0x5a,0xee, 0x5b,0xa0,0xa0,0xfb, +0xa4,0x52,0x52,0xf6, 0x76,0x3b,0x3b,0x4d, 0xb7,0xd6,0xd6,0x61, 0x7d,0xb3,0xb3,0xce, +0x52,0x29,0x29,0x7b, 0xdd,0xe3,0xe3,0x3e, 0x5e,0x2f,0x2f,0x71, 0x13,0x84,0x84,0x97, +0xa6,0x53,0x53,0xf5, 0xb9,0xd1,0xd1,0x68, 0x00,0x00,0x00,0x00, 0xc1,0xed,0xed,0x2c, +0x40,0x20,0x20,0x60, 0xe3,0xfc,0xfc,0x1f, 0x79,0xb1,0xb1,0xc8, 0xb6,0x5b,0x5b,0xed, +0xd4,0x6a,0x6a,0xbe, 0x8d,0xcb,0xcb,0x46, 0x67,0xbe,0xbe,0xd9, 0x72,0x39,0x39,0x4b, +0x94,0x4a,0x4a,0xde, 0x98,0x4c,0x4c,0xd4, 0xb0,0x58,0x58,0xe8, 0x85,0xcf,0xcf,0x4a, +0xbb,0xd0,0xd0,0x6b, 0xc5,0xef,0xef,0x2a, 0x4f,0xaa,0xaa,0xe5, 0xed,0xfb,0xfb,0x16, +0x86,0x43,0x43,0xc5, 0x9a,0x4d,0x4d,0xd7, 0x66,0x33,0x33,0x55, 0x11,0x85,0x85,0x94, +0x8a,0x45,0x45,0xcf, 0xe9,0xf9,0xf9,0x10, 0x04,0x02,0x02,0x06, 0xfe,0x7f,0x7f,0x81, +0xa0,0x50,0x50,0xf0, 0x78,0x3c,0x3c,0x44, 0x25,0x9f,0x9f,0xba, 0x4b,0xa8,0xa8,0xe3, +0xa2,0x51,0x51,0xf3, 0x5d,0xa3,0xa3,0xfe, 0x80,0x40,0x40,0xc0, 0x05,0x8f,0x8f,0x8a, +0x3f,0x92,0x92,0xad, 0x21,0x9d,0x9d,0xbc, 0x70,0x38,0x38,0x48, 0xf1,0xf5,0xf5,0x04, +0x63,0xbc,0xbc,0xdf, 0x77,0xb6,0xb6,0xc1, 0xaf,0xda,0xda,0x75, 0x42,0x21,0x21,0x63, +0x20,0x10,0x10,0x30, 0xe5,0xff,0xff,0x1a, 0xfd,0xf3,0xf3,0x0e, 0xbf,0xd2,0xd2,0x6d, +0x81,0xcd,0xcd,0x4c, 0x18,0x0c,0x0c,0x14, 0x26,0x13,0x13,0x35, 0xc3,0xec,0xec,0x2f, +0xbe,0x5f,0x5f,0xe1, 0x35,0x97,0x97,0xa2, 0x88,0x44,0x44,0xcc, 0x2e,0x17,0x17,0x39, +0x93,0xc4,0xc4,0x57, 0x55,0xa7,0xa7,0xf2, 0xfc,0x7e,0x7e,0x82, 0x7a,0x3d,0x3d,0x47, +0xc8,0x64,0x64,0xac, 0xba,0x5d,0x5d,0xe7, 0x32,0x19,0x19,0x2b, 0xe6,0x73,0x73,0x95, +0xc0,0x60,0x60,0xa0, 0x19,0x81,0x81,0x98, 0x9e,0x4f,0x4f,0xd1, 0xa3,0xdc,0xdc,0x7f, +0x44,0x22,0x22,0x66, 0x54,0x2a,0x2a,0x7e, 0x3b,0x90,0x90,0xab, 0x0b,0x88,0x88,0x83, +0x8c,0x46,0x46,0xca, 0xc7,0xee,0xee,0x29, 0x6b,0xb8,0xb8,0xd3, 0x28,0x14,0x14,0x3c, +0xa7,0xde,0xde,0x79, 0xbc,0x5e,0x5e,0xe2, 0x16,0x0b,0x0b,0x1d, 0xad,0xdb,0xdb,0x76, +0xdb,0xe0,0xe0,0x3b, 0x64,0x32,0x32,0x56, 0x74,0x3a,0x3a,0x4e, 0x14,0x0a,0x0a,0x1e, +0x92,0x49,0x49,0xdb, 0x0c,0x06,0x06,0x0a, 0x48,0x24,0x24,0x6c, 0xb8,0x5c,0x5c,0xe4, +0x9f,0xc2,0xc2,0x5d, 0xbd,0xd3,0xd3,0x6e, 0x43,0xac,0xac,0xef, 0xc4,0x62,0x62,0xa6, +0x39,0x91,0x91,0xa8, 0x31,0x95,0x95,0xa4, 0xd3,0xe4,0xe4,0x37, 0xf2,0x79,0x79,0x8b, +0xd5,0xe7,0xe7,0x32, 0x8b,0xc8,0xc8,0x43, 0x6e,0x37,0x37,0x59, 0xda,0x6d,0x6d,0xb7, +0x01,0x8d,0x8d,0x8c, 0xb1,0xd5,0xd5,0x64, 0x9c,0x4e,0x4e,0xd2, 0x49,0xa9,0xa9,0xe0, +0xd8,0x6c,0x6c,0xb4, 0xac,0x56,0x56,0xfa, 0xf3,0xf4,0xf4,0x07, 0xcf,0xea,0xea,0x25, +0xca,0x65,0x65,0xaf, 0xf4,0x7a,0x7a,0x8e, 0x47,0xae,0xae,0xe9, 0x10,0x08,0x08,0x18, +0x6f,0xba,0xba,0xd5, 0xf0,0x78,0x78,0x88, 0x4a,0x25,0x25,0x6f, 0x5c,0x2e,0x2e,0x72, +0x38,0x1c,0x1c,0x24, 0x57,0xa6,0xa6,0xf1, 0x73,0xb4,0xb4,0xc7, 0x97,0xc6,0xc6,0x51, +0xcb,0xe8,0xe8,0x23, 0xa1,0xdd,0xdd,0x7c, 0xe8,0x74,0x74,0x9c, 0x3e,0x1f,0x1f,0x21, +0x96,0x4b,0x4b,0xdd, 0x61,0xbd,0xbd,0xdc, 0x0d,0x8b,0x8b,0x86, 0x0f,0x8a,0x8a,0x85, +0xe0,0x70,0x70,0x90, 0x7c,0x3e,0x3e,0x42, 0x71,0xb5,0xb5,0xc4, 0xcc,0x66,0x66,0xaa, +0x90,0x48,0x48,0xd8, 0x06,0x03,0x03,0x05, 0xf7,0xf6,0xf6,0x01, 0x1c,0x0e,0x0e,0x12, +0xc2,0x61,0x61,0xa3, 0x6a,0x35,0x35,0x5f, 0xae,0x57,0x57,0xf9, 0x69,0xb9,0xb9,0xd0, +0x17,0x86,0x86,0x91, 0x99,0xc1,0xc1,0x58, 0x3a,0x1d,0x1d,0x27, 0x27,0x9e,0x9e,0xb9, +0xd9,0xe1,0xe1,0x38, 0xeb,0xf8,0xf8,0x13, 0x2b,0x98,0x98,0xb3, 0x22,0x11,0x11,0x33, +0xd2,0x69,0x69,0xbb, 0xa9,0xd9,0xd9,0x70, 0x07,0x8e,0x8e,0x89, 0x33,0x94,0x94,0xa7, +0x2d,0x9b,0x9b,0xb6, 0x3c,0x1e,0x1e,0x22, 0x15,0x87,0x87,0x92, 0xc9,0xe9,0xe9,0x20, +0x87,0xce,0xce,0x49, 0xaa,0x55,0x55,0xff, 0x50,0x28,0x28,0x78, 0xa5,0xdf,0xdf,0x7a, +0x03,0x8c,0x8c,0x8f, 0x59,0xa1,0xa1,0xf8, 0x09,0x89,0x89,0x80, 0x1a,0x0d,0x0d,0x17, +0x65,0xbf,0xbf,0xda, 0xd7,0xe6,0xe6,0x31, 0x84,0x42,0x42,0xc6, 0xd0,0x68,0x68,0xb8, +0x82,0x41,0x41,0xc3, 0x29,0x99,0x99,0xb0, 0x5a,0x2d,0x2d,0x77, 0x1e,0x0f,0x0f,0x11, +0x7b,0xb0,0xb0,0xcb, 0xa8,0x54,0x54,0xfc, 0x6d,0xbb,0xbb,0xd6, 0x2c,0x16,0x16,0x3a +},{ +0xa5,0xc6,0x63,0x63, 0x84,0xf8,0x7c,0x7c, 0x99,0xee,0x77,0x77, 0x8d,0xf6,0x7b,0x7b, +0x0d,0xff,0xf2,0xf2, 0xbd,0xd6,0x6b,0x6b, 0xb1,0xde,0x6f,0x6f, 0x54,0x91,0xc5,0xc5, +0x50,0x60,0x30,0x30, 0x03,0x02,0x01,0x01, 0xa9,0xce,0x67,0x67, 0x7d,0x56,0x2b,0x2b, +0x19,0xe7,0xfe,0xfe, 0x62,0xb5,0xd7,0xd7, 0xe6,0x4d,0xab,0xab, 0x9a,0xec,0x76,0x76, +0x45,0x8f,0xca,0xca, 0x9d,0x1f,0x82,0x82, 0x40,0x89,0xc9,0xc9, 0x87,0xfa,0x7d,0x7d, +0x15,0xef,0xfa,0xfa, 0xeb,0xb2,0x59,0x59, 0xc9,0x8e,0x47,0x47, 0x0b,0xfb,0xf0,0xf0, +0xec,0x41,0xad,0xad, 0x67,0xb3,0xd4,0xd4, 0xfd,0x5f,0xa2,0xa2, 0xea,0x45,0xaf,0xaf, +0xbf,0x23,0x9c,0x9c, 0xf7,0x53,0xa4,0xa4, 0x96,0xe4,0x72,0x72, 0x5b,0x9b,0xc0,0xc0, +0xc2,0x75,0xb7,0xb7, 0x1c,0xe1,0xfd,0xfd, 0xae,0x3d,0x93,0x93, 0x6a,0x4c,0x26,0x26, +0x5a,0x6c,0x36,0x36, 0x41,0x7e,0x3f,0x3f, 0x02,0xf5,0xf7,0xf7, 0x4f,0x83,0xcc,0xcc, +0x5c,0x68,0x34,0x34, 0xf4,0x51,0xa5,0xa5, 0x34,0xd1,0xe5,0xe5, 0x08,0xf9,0xf1,0xf1, +0x93,0xe2,0x71,0x71, 0x73,0xab,0xd8,0xd8, 0x53,0x62,0x31,0x31, 0x3f,0x2a,0x15,0x15, +0x0c,0x08,0x04,0x04, 0x52,0x95,0xc7,0xc7, 0x65,0x46,0x23,0x23, 0x5e,0x9d,0xc3,0xc3, +0x28,0x30,0x18,0x18, 0xa1,0x37,0x96,0x96, 0x0f,0x0a,0x05,0x05, 0xb5,0x2f,0x9a,0x9a, +0x09,0x0e,0x07,0x07, 0x36,0x24,0x12,0x12, 0x9b,0x1b,0x80,0x80, 0x3d,0xdf,0xe2,0xe2, +0x26,0xcd,0xeb,0xeb, 0x69,0x4e,0x27,0x27, 0xcd,0x7f,0xb2,0xb2, 0x9f,0xea,0x75,0x75, +0x1b,0x12,0x09,0x09, 0x9e,0x1d,0x83,0x83, 0x74,0x58,0x2c,0x2c, 0x2e,0x34,0x1a,0x1a, +0x2d,0x36,0x1b,0x1b, 0xb2,0xdc,0x6e,0x6e, 0xee,0xb4,0x5a,0x5a, 0xfb,0x5b,0xa0,0xa0, +0xf6,0xa4,0x52,0x52, 0x4d,0x76,0x3b,0x3b, 0x61,0xb7,0xd6,0xd6, 0xce,0x7d,0xb3,0xb3, +0x7b,0x52,0x29,0x29, 0x3e,0xdd,0xe3,0xe3, 0x71,0x5e,0x2f,0x2f, 0x97,0x13,0x84,0x84, +0xf5,0xa6,0x53,0x53, 0x68,0xb9,0xd1,0xd1, 0x00,0x00,0x00,0x00, 0x2c,0xc1,0xed,0xed, +0x60,0x40,0x20,0x20, 0x1f,0xe3,0xfc,0xfc, 0xc8,0x79,0xb1,0xb1, 0xed,0xb6,0x5b,0x5b, +0xbe,0xd4,0x6a,0x6a, 0x46,0x8d,0xcb,0xcb, 0xd9,0x67,0xbe,0xbe, 0x4b,0x72,0x39,0x39, +0xde,0x94,0x4a,0x4a, 0xd4,0x98,0x4c,0x4c, 0xe8,0xb0,0x58,0x58, 0x4a,0x85,0xcf,0xcf, +0x6b,0xbb,0xd0,0xd0, 0x2a,0xc5,0xef,0xef, 0xe5,0x4f,0xaa,0xaa, 0x16,0xed,0xfb,0xfb, +0xc5,0x86,0x43,0x43, 0xd7,0x9a,0x4d,0x4d, 0x55,0x66,0x33,0x33, 0x94,0x11,0x85,0x85, +0xcf,0x8a,0x45,0x45, 0x10,0xe9,0xf9,0xf9, 0x06,0x04,0x02,0x02, 0x81,0xfe,0x7f,0x7f, +0xf0,0xa0,0x50,0x50, 0x44,0x78,0x3c,0x3c, 0xba,0x25,0x9f,0x9f, 0xe3,0x4b,0xa8,0xa8, +0xf3,0xa2,0x51,0x51, 0xfe,0x5d,0xa3,0xa3, 0xc0,0x80,0x40,0x40, 0x8a,0x05,0x8f,0x8f, +0xad,0x3f,0x92,0x92, 0xbc,0x21,0x9d,0x9d, 0x48,0x70,0x38,0x38, 0x04,0xf1,0xf5,0xf5, +0xdf,0x63,0xbc,0xbc, 0xc1,0x77,0xb6,0xb6, 0x75,0xaf,0xda,0xda, 0x63,0x42,0x21,0x21, +0x30,0x20,0x10,0x10, 0x1a,0xe5,0xff,0xff, 0x0e,0xfd,0xf3,0xf3, 0x6d,0xbf,0xd2,0xd2, +0x4c,0x81,0xcd,0xcd, 0x14,0x18,0x0c,0x0c, 0x35,0x26,0x13,0x13, 0x2f,0xc3,0xec,0xec, +0xe1,0xbe,0x5f,0x5f, 0xa2,0x35,0x97,0x97, 0xcc,0x88,0x44,0x44, 0x39,0x2e,0x17,0x17, +0x57,0x93,0xc4,0xc4, 0xf2,0x55,0xa7,0xa7, 0x82,0xfc,0x7e,0x7e, 0x47,0x7a,0x3d,0x3d, +0xac,0xc8,0x64,0x64, 0xe7,0xba,0x5d,0x5d, 0x2b,0x32,0x19,0x19, 0x95,0xe6,0x73,0x73, +0xa0,0xc0,0x60,0x60, 0x98,0x19,0x81,0x81, 0xd1,0x9e,0x4f,0x4f, 0x7f,0xa3,0xdc,0xdc, +0x66,0x44,0x22,0x22, 0x7e,0x54,0x2a,0x2a, 0xab,0x3b,0x90,0x90, 0x83,0x0b,0x88,0x88, +0xca,0x8c,0x46,0x46, 0x29,0xc7,0xee,0xee, 0xd3,0x6b,0xb8,0xb8, 0x3c,0x28,0x14,0x14, +0x79,0xa7,0xde,0xde, 0xe2,0xbc,0x5e,0x5e, 0x1d,0x16,0x0b,0x0b, 0x76,0xad,0xdb,0xdb, +0x3b,0xdb,0xe0,0xe0, 0x56,0x64,0x32,0x32, 0x4e,0x74,0x3a,0x3a, 0x1e,0x14,0x0a,0x0a, +0xdb,0x92,0x49,0x49, 0x0a,0x0c,0x06,0x06, 0x6c,0x48,0x24,0x24, 0xe4,0xb8,0x5c,0x5c, +0x5d,0x9f,0xc2,0xc2, 0x6e,0xbd,0xd3,0xd3, 0xef,0x43,0xac,0xac, 0xa6,0xc4,0x62,0x62, +0xa8,0x39,0x91,0x91, 0xa4,0x31,0x95,0x95, 0x37,0xd3,0xe4,0xe4, 0x8b,0xf2,0x79,0x79, +0x32,0xd5,0xe7,0xe7, 0x43,0x8b,0xc8,0xc8, 0x59,0x6e,0x37,0x37, 0xb7,0xda,0x6d,0x6d, +0x8c,0x01,0x8d,0x8d, 0x64,0xb1,0xd5,0xd5, 0xd2,0x9c,0x4e,0x4e, 0xe0,0x49,0xa9,0xa9, +0xb4,0xd8,0x6c,0x6c, 0xfa,0xac,0x56,0x56, 0x07,0xf3,0xf4,0xf4, 0x25,0xcf,0xea,0xea, +0xaf,0xca,0x65,0x65, 0x8e,0xf4,0x7a,0x7a, 0xe9,0x47,0xae,0xae, 0x18,0x10,0x08,0x08, +0xd5,0x6f,0xba,0xba, 0x88,0xf0,0x78,0x78, 0x6f,0x4a,0x25,0x25, 0x72,0x5c,0x2e,0x2e, +0x24,0x38,0x1c,0x1c, 0xf1,0x57,0xa6,0xa6, 0xc7,0x73,0xb4,0xb4, 0x51,0x97,0xc6,0xc6, +0x23,0xcb,0xe8,0xe8, 0x7c,0xa1,0xdd,0xdd, 0x9c,0xe8,0x74,0x74, 0x21,0x3e,0x1f,0x1f, +0xdd,0x96,0x4b,0x4b, 0xdc,0x61,0xbd,0xbd, 0x86,0x0d,0x8b,0x8b, 0x85,0x0f,0x8a,0x8a, +0x90,0xe0,0x70,0x70, 0x42,0x7c,0x3e,0x3e, 0xc4,0x71,0xb5,0xb5, 0xaa,0xcc,0x66,0x66, +0xd8,0x90,0x48,0x48, 0x05,0x06,0x03,0x03, 0x01,0xf7,0xf6,0xf6, 0x12,0x1c,0x0e,0x0e, +0xa3,0xc2,0x61,0x61, 0x5f,0x6a,0x35,0x35, 0xf9,0xae,0x57,0x57, 0xd0,0x69,0xb9,0xb9, +0x91,0x17,0x86,0x86, 0x58,0x99,0xc1,0xc1, 0x27,0x3a,0x1d,0x1d, 0xb9,0x27,0x9e,0x9e, +0x38,0xd9,0xe1,0xe1, 0x13,0xeb,0xf8,0xf8, 0xb3,0x2b,0x98,0x98, 0x33,0x22,0x11,0x11, +0xbb,0xd2,0x69,0x69, 0x70,0xa9,0xd9,0xd9, 0x89,0x07,0x8e,0x8e, 0xa7,0x33,0x94,0x94, +0xb6,0x2d,0x9b,0x9b, 0x22,0x3c,0x1e,0x1e, 0x92,0x15,0x87,0x87, 0x20,0xc9,0xe9,0xe9, +0x49,0x87,0xce,0xce, 0xff,0xaa,0x55,0x55, 0x78,0x50,0x28,0x28, 0x7a,0xa5,0xdf,0xdf, +0x8f,0x03,0x8c,0x8c, 0xf8,0x59,0xa1,0xa1, 0x80,0x09,0x89,0x89, 0x17,0x1a,0x0d,0x0d, +0xda,0x65,0xbf,0xbf, 0x31,0xd7,0xe6,0xe6, 0xc6,0x84,0x42,0x42, 0xb8,0xd0,0x68,0x68, +0xc3,0x82,0x41,0x41, 0xb0,0x29,0x99,0x99, 0x77,0x5a,0x2d,0x2d, 0x11,0x1e,0x0f,0x0f, +0xcb,0x7b,0xb0,0xb0, 0xfc,0xa8,0x54,0x54, 0xd6,0x6d,0xbb,0xbb, 0x3a,0x2c,0x16,0x16 +},{ +0x63,0xa5,0xc6,0x63, 0x7c,0x84,0xf8,0x7c, 0x77,0x99,0xee,0x77, 0x7b,0x8d,0xf6,0x7b, +0xf2,0x0d,0xff,0xf2, 0x6b,0xbd,0xd6,0x6b, 0x6f,0xb1,0xde,0x6f, 0xc5,0x54,0x91,0xc5, +0x30,0x50,0x60,0x30, 0x01,0x03,0x02,0x01, 0x67,0xa9,0xce,0x67, 0x2b,0x7d,0x56,0x2b, +0xfe,0x19,0xe7,0xfe, 0xd7,0x62,0xb5,0xd7, 0xab,0xe6,0x4d,0xab, 0x76,0x9a,0xec,0x76, +0xca,0x45,0x8f,0xca, 0x82,0x9d,0x1f,0x82, 0xc9,0x40,0x89,0xc9, 0x7d,0x87,0xfa,0x7d, +0xfa,0x15,0xef,0xfa, 0x59,0xeb,0xb2,0x59, 0x47,0xc9,0x8e,0x47, 0xf0,0x0b,0xfb,0xf0, +0xad,0xec,0x41,0xad, 0xd4,0x67,0xb3,0xd4, 0xa2,0xfd,0x5f,0xa2, 0xaf,0xea,0x45,0xaf, +0x9c,0xbf,0x23,0x9c, 0xa4,0xf7,0x53,0xa4, 0x72,0x96,0xe4,0x72, 0xc0,0x5b,0x9b,0xc0, +0xb7,0xc2,0x75,0xb7, 0xfd,0x1c,0xe1,0xfd, 0x93,0xae,0x3d,0x93, 0x26,0x6a,0x4c,0x26, +0x36,0x5a,0x6c,0x36, 0x3f,0x41,0x7e,0x3f, 0xf7,0x02,0xf5,0xf7, 0xcc,0x4f,0x83,0xcc, +0x34,0x5c,0x68,0x34, 0xa5,0xf4,0x51,0xa5, 0xe5,0x34,0xd1,0xe5, 0xf1,0x08,0xf9,0xf1, +0x71,0x93,0xe2,0x71, 0xd8,0x73,0xab,0xd8, 0x31,0x53,0x62,0x31, 0x15,0x3f,0x2a,0x15, +0x04,0x0c,0x08,0x04, 0xc7,0x52,0x95,0xc7, 0x23,0x65,0x46,0x23, 0xc3,0x5e,0x9d,0xc3, +0x18,0x28,0x30,0x18, 0x96,0xa1,0x37,0x96, 0x05,0x0f,0x0a,0x05, 0x9a,0xb5,0x2f,0x9a, +0x07,0x09,0x0e,0x07, 0x12,0x36,0x24,0x12, 0x80,0x9b,0x1b,0x80, 0xe2,0x3d,0xdf,0xe2, +0xeb,0x26,0xcd,0xeb, 0x27,0x69,0x4e,0x27, 0xb2,0xcd,0x7f,0xb2, 0x75,0x9f,0xea,0x75, +0x09,0x1b,0x12,0x09, 0x83,0x9e,0x1d,0x83, 0x2c,0x74,0x58,0x2c, 0x1a,0x2e,0x34,0x1a, +0x1b,0x2d,0x36,0x1b, 0x6e,0xb2,0xdc,0x6e, 0x5a,0xee,0xb4,0x5a, 0xa0,0xfb,0x5b,0xa0, +0x52,0xf6,0xa4,0x52, 0x3b,0x4d,0x76,0x3b, 0xd6,0x61,0xb7,0xd6, 0xb3,0xce,0x7d,0xb3, +0x29,0x7b,0x52,0x29, 0xe3,0x3e,0xdd,0xe3, 0x2f,0x71,0x5e,0x2f, 0x84,0x97,0x13,0x84, +0x53,0xf5,0xa6,0x53, 0xd1,0x68,0xb9,0xd1, 0x00,0x00,0x00,0x00, 0xed,0x2c,0xc1,0xed, +0x20,0x60,0x40,0x20, 0xfc,0x1f,0xe3,0xfc, 0xb1,0xc8,0x79,0xb1, 0x5b,0xed,0xb6,0x5b, +0x6a,0xbe,0xd4,0x6a, 0xcb,0x46,0x8d,0xcb, 0xbe,0xd9,0x67,0xbe, 0x39,0x4b,0x72,0x39, +0x4a,0xde,0x94,0x4a, 0x4c,0xd4,0x98,0x4c, 0x58,0xe8,0xb0,0x58, 0xcf,0x4a,0x85,0xcf, +0xd0,0x6b,0xbb,0xd0, 0xef,0x2a,0xc5,0xef, 0xaa,0xe5,0x4f,0xaa, 0xfb,0x16,0xed,0xfb, +0x43,0xc5,0x86,0x43, 0x4d,0xd7,0x9a,0x4d, 0x33,0x55,0x66,0x33, 0x85,0x94,0x11,0x85, +0x45,0xcf,0x8a,0x45, 0xf9,0x10,0xe9,0xf9, 0x02,0x06,0x04,0x02, 0x7f,0x81,0xfe,0x7f, +0x50,0xf0,0xa0,0x50, 0x3c,0x44,0x78,0x3c, 0x9f,0xba,0x25,0x9f, 0xa8,0xe3,0x4b,0xa8, +0x51,0xf3,0xa2,0x51, 0xa3,0xfe,0x5d,0xa3, 0x40,0xc0,0x80,0x40, 0x8f,0x8a,0x05,0x8f, +0x92,0xad,0x3f,0x92, 0x9d,0xbc,0x21,0x9d, 0x38,0x48,0x70,0x38, 0xf5,0x04,0xf1,0xf5, +0xbc,0xdf,0x63,0xbc, 0xb6,0xc1,0x77,0xb6, 0xda,0x75,0xaf,0xda, 0x21,0x63,0x42,0x21, +0x10,0x30,0x20,0x10, 0xff,0x1a,0xe5,0xff, 0xf3,0x0e,0xfd,0xf3, 0xd2,0x6d,0xbf,0xd2, +0xcd,0x4c,0x81,0xcd, 0x0c,0x14,0x18,0x0c, 0x13,0x35,0x26,0x13, 0xec,0x2f,0xc3,0xec, +0x5f,0xe1,0xbe,0x5f, 0x97,0xa2,0x35,0x97, 0x44,0xcc,0x88,0x44, 0x17,0x39,0x2e,0x17, +0xc4,0x57,0x93,0xc4, 0xa7,0xf2,0x55,0xa7, 0x7e,0x82,0xfc,0x7e, 0x3d,0x47,0x7a,0x3d, +0x64,0xac,0xc8,0x64, 0x5d,0xe7,0xba,0x5d, 0x19,0x2b,0x32,0x19, 0x73,0x95,0xe6,0x73, +0x60,0xa0,0xc0,0x60, 0x81,0x98,0x19,0x81, 0x4f,0xd1,0x9e,0x4f, 0xdc,0x7f,0xa3,0xdc, +0x22,0x66,0x44,0x22, 0x2a,0x7e,0x54,0x2a, 0x90,0xab,0x3b,0x90, 0x88,0x83,0x0b,0x88, +0x46,0xca,0x8c,0x46, 0xee,0x29,0xc7,0xee, 0xb8,0xd3,0x6b,0xb8, 0x14,0x3c,0x28,0x14, +0xde,0x79,0xa7,0xde, 0x5e,0xe2,0xbc,0x5e, 0x0b,0x1d,0x16,0x0b, 0xdb,0x76,0xad,0xdb, +0xe0,0x3b,0xdb,0xe0, 0x32,0x56,0x64,0x32, 0x3a,0x4e,0x74,0x3a, 0x0a,0x1e,0x14,0x0a, +0x49,0xdb,0x92,0x49, 0x06,0x0a,0x0c,0x06, 0x24,0x6c,0x48,0x24, 0x5c,0xe4,0xb8,0x5c, +0xc2,0x5d,0x9f,0xc2, 0xd3,0x6e,0xbd,0xd3, 0xac,0xef,0x43,0xac, 0x62,0xa6,0xc4,0x62, +0x91,0xa8,0x39,0x91, 0x95,0xa4,0x31,0x95, 0xe4,0x37,0xd3,0xe4, 0x79,0x8b,0xf2,0x79, +0xe7,0x32,0xd5,0xe7, 0xc8,0x43,0x8b,0xc8, 0x37,0x59,0x6e,0x37, 0x6d,0xb7,0xda,0x6d, +0x8d,0x8c,0x01,0x8d, 0xd5,0x64,0xb1,0xd5, 0x4e,0xd2,0x9c,0x4e, 0xa9,0xe0,0x49,0xa9, +0x6c,0xb4,0xd8,0x6c, 0x56,0xfa,0xac,0x56, 0xf4,0x07,0xf3,0xf4, 0xea,0x25,0xcf,0xea, +0x65,0xaf,0xca,0x65, 0x7a,0x8e,0xf4,0x7a, 0xae,0xe9,0x47,0xae, 0x08,0x18,0x10,0x08, +0xba,0xd5,0x6f,0xba, 0x78,0x88,0xf0,0x78, 0x25,0x6f,0x4a,0x25, 0x2e,0x72,0x5c,0x2e, +0x1c,0x24,0x38,0x1c, 0xa6,0xf1,0x57,0xa6, 0xb4,0xc7,0x73,0xb4, 0xc6,0x51,0x97,0xc6, +0xe8,0x23,0xcb,0xe8, 0xdd,0x7c,0xa1,0xdd, 0x74,0x9c,0xe8,0x74, 0x1f,0x21,0x3e,0x1f, +0x4b,0xdd,0x96,0x4b, 0xbd,0xdc,0x61,0xbd, 0x8b,0x86,0x0d,0x8b, 0x8a,0x85,0x0f,0x8a, +0x70,0x90,0xe0,0x70, 0x3e,0x42,0x7c,0x3e, 0xb5,0xc4,0x71,0xb5, 0x66,0xaa,0xcc,0x66, +0x48,0xd8,0x90,0x48, 0x03,0x05,0x06,0x03, 0xf6,0x01,0xf7,0xf6, 0x0e,0x12,0x1c,0x0e, +0x61,0xa3,0xc2,0x61, 0x35,0x5f,0x6a,0x35, 0x57,0xf9,0xae,0x57, 0xb9,0xd0,0x69,0xb9, +0x86,0x91,0x17,0x86, 0xc1,0x58,0x99,0xc1, 0x1d,0x27,0x3a,0x1d, 0x9e,0xb9,0x27,0x9e, +0xe1,0x38,0xd9,0xe1, 0xf8,0x13,0xeb,0xf8, 0x98,0xb3,0x2b,0x98, 0x11,0x33,0x22,0x11, +0x69,0xbb,0xd2,0x69, 0xd9,0x70,0xa9,0xd9, 0x8e,0x89,0x07,0x8e, 0x94,0xa7,0x33,0x94, +0x9b,0xb6,0x2d,0x9b, 0x1e,0x22,0x3c,0x1e, 0x87,0x92,0x15,0x87, 0xe9,0x20,0xc9,0xe9, +0xce,0x49,0x87,0xce, 0x55,0xff,0xaa,0x55, 0x28,0x78,0x50,0x28, 0xdf,0x7a,0xa5,0xdf, +0x8c,0x8f,0x03,0x8c, 0xa1,0xf8,0x59,0xa1, 0x89,0x80,0x09,0x89, 0x0d,0x17,0x1a,0x0d, +0xbf,0xda,0x65,0xbf, 0xe6,0x31,0xd7,0xe6, 0x42,0xc6,0x84,0x42, 0x68,0xb8,0xd0,0x68, +0x41,0xc3,0x82,0x41, 0x99,0xb0,0x29,0x99, 0x2d,0x77,0x5a,0x2d, 0x0f,0x11,0x1e,0x0f, +0xb0,0xcb,0x7b,0xb0, 0x54,0xfc,0xa8,0x54, 0xbb,0xd6,0x6d,0xbb, 0x16,0x3a,0x2c,0x16 +},{ +0x63,0x63,0xa5,0xc6, 0x7c,0x7c,0x84,0xf8, 0x77,0x77,0x99,0xee, 0x7b,0x7b,0x8d,0xf6, +0xf2,0xf2,0x0d,0xff, 0x6b,0x6b,0xbd,0xd6, 0x6f,0x6f,0xb1,0xde, 0xc5,0xc5,0x54,0x91, +0x30,0x30,0x50,0x60, 0x01,0x01,0x03,0x02, 0x67,0x67,0xa9,0xce, 0x2b,0x2b,0x7d,0x56, +0xfe,0xfe,0x19,0xe7, 0xd7,0xd7,0x62,0xb5, 0xab,0xab,0xe6,0x4d, 0x76,0x76,0x9a,0xec, +0xca,0xca,0x45,0x8f, 0x82,0x82,0x9d,0x1f, 0xc9,0xc9,0x40,0x89, 0x7d,0x7d,0x87,0xfa, +0xfa,0xfa,0x15,0xef, 0x59,0x59,0xeb,0xb2, 0x47,0x47,0xc9,0x8e, 0xf0,0xf0,0x0b,0xfb, +0xad,0xad,0xec,0x41, 0xd4,0xd4,0x67,0xb3, 0xa2,0xa2,0xfd,0x5f, 0xaf,0xaf,0xea,0x45, +0x9c,0x9c,0xbf,0x23, 0xa4,0xa4,0xf7,0x53, 0x72,0x72,0x96,0xe4, 0xc0,0xc0,0x5b,0x9b, +0xb7,0xb7,0xc2,0x75, 0xfd,0xfd,0x1c,0xe1, 0x93,0x93,0xae,0x3d, 0x26,0x26,0x6a,0x4c, +0x36,0x36,0x5a,0x6c, 0x3f,0x3f,0x41,0x7e, 0xf7,0xf7,0x02,0xf5, 0xcc,0xcc,0x4f,0x83, +0x34,0x34,0x5c,0x68, 0xa5,0xa5,0xf4,0x51, 0xe5,0xe5,0x34,0xd1, 0xf1,0xf1,0x08,0xf9, +0x71,0x71,0x93,0xe2, 0xd8,0xd8,0x73,0xab, 0x31,0x31,0x53,0x62, 0x15,0x15,0x3f,0x2a, +0x04,0x04,0x0c,0x08, 0xc7,0xc7,0x52,0x95, 0x23,0x23,0x65,0x46, 0xc3,0xc3,0x5e,0x9d, +0x18,0x18,0x28,0x30, 0x96,0x96,0xa1,0x37, 0x05,0x05,0x0f,0x0a, 0x9a,0x9a,0xb5,0x2f, +0x07,0x07,0x09,0x0e, 0x12,0x12,0x36,0x24, 0x80,0x80,0x9b,0x1b, 0xe2,0xe2,0x3d,0xdf, +0xeb,0xeb,0x26,0xcd, 0x27,0x27,0x69,0x4e, 0xb2,0xb2,0xcd,0x7f, 0x75,0x75,0x9f,0xea, +0x09,0x09,0x1b,0x12, 0x83,0x83,0x9e,0x1d, 0x2c,0x2c,0x74,0x58, 0x1a,0x1a,0x2e,0x34, +0x1b,0x1b,0x2d,0x36, 0x6e,0x6e,0xb2,0xdc, 0x5a,0x5a,0xee,0xb4, 0xa0,0xa0,0xfb,0x5b, +0x52,0x52,0xf6,0xa4, 0x3b,0x3b,0x4d,0x76, 0xd6,0xd6,0x61,0xb7, 0xb3,0xb3,0xce,0x7d, +0x29,0x29,0x7b,0x52, 0xe3,0xe3,0x3e,0xdd, 0x2f,0x2f,0x71,0x5e, 0x84,0x84,0x97,0x13, +0x53,0x53,0xf5,0xa6, 0xd1,0xd1,0x68,0xb9, 0x00,0x00,0x00,0x00, 0xed,0xed,0x2c,0xc1, +0x20,0x20,0x60,0x40, 0xfc,0xfc,0x1f,0xe3, 0xb1,0xb1,0xc8,0x79, 0x5b,0x5b,0xed,0xb6, +0x6a,0x6a,0xbe,0xd4, 0xcb,0xcb,0x46,0x8d, 0xbe,0xbe,0xd9,0x67, 0x39,0x39,0x4b,0x72, +0x4a,0x4a,0xde,0x94, 0x4c,0x4c,0xd4,0x98, 0x58,0x58,0xe8,0xb0, 0xcf,0xcf,0x4a,0x85, +0xd0,0xd0,0x6b,0xbb, 0xef,0xef,0x2a,0xc5, 0xaa,0xaa,0xe5,0x4f, 0xfb,0xfb,0x16,0xed, +0x43,0x43,0xc5,0x86, 0x4d,0x4d,0xd7,0x9a, 0x33,0x33,0x55,0x66, 0x85,0x85,0x94,0x11, +0x45,0x45,0xcf,0x8a, 0xf9,0xf9,0x10,0xe9, 0x02,0x02,0x06,0x04, 0x7f,0x7f,0x81,0xfe, +0x50,0x50,0xf0,0xa0, 0x3c,0x3c,0x44,0x78, 0x9f,0x9f,0xba,0x25, 0xa8,0xa8,0xe3,0x4b, +0x51,0x51,0xf3,0xa2, 0xa3,0xa3,0xfe,0x5d, 0x40,0x40,0xc0,0x80, 0x8f,0x8f,0x8a,0x05, +0x92,0x92,0xad,0x3f, 0x9d,0x9d,0xbc,0x21, 0x38,0x38,0x48,0x70, 0xf5,0xf5,0x04,0xf1, +0xbc,0xbc,0xdf,0x63, 0xb6,0xb6,0xc1,0x77, 0xda,0xda,0x75,0xaf, 0x21,0x21,0x63,0x42, +0x10,0x10,0x30,0x20, 0xff,0xff,0x1a,0xe5, 0xf3,0xf3,0x0e,0xfd, 0xd2,0xd2,0x6d,0xbf, +0xcd,0xcd,0x4c,0x81, 0x0c,0x0c,0x14,0x18, 0x13,0x13,0x35,0x26, 0xec,0xec,0x2f,0xc3, +0x5f,0x5f,0xe1,0xbe, 0x97,0x97,0xa2,0x35, 0x44,0x44,0xcc,0x88, 0x17,0x17,0x39,0x2e, +0xc4,0xc4,0x57,0x93, 0xa7,0xa7,0xf2,0x55, 0x7e,0x7e,0x82,0xfc, 0x3d,0x3d,0x47,0x7a, +0x64,0x64,0xac,0xc8, 0x5d,0x5d,0xe7,0xba, 0x19,0x19,0x2b,0x32, 0x73,0x73,0x95,0xe6, +0x60,0x60,0xa0,0xc0, 0x81,0x81,0x98,0x19, 0x4f,0x4f,0xd1,0x9e, 0xdc,0xdc,0x7f,0xa3, +0x22,0x22,0x66,0x44, 0x2a,0x2a,0x7e,0x54, 0x90,0x90,0xab,0x3b, 0x88,0x88,0x83,0x0b, +0x46,0x46,0xca,0x8c, 0xee,0xee,0x29,0xc7, 0xb8,0xb8,0xd3,0x6b, 0x14,0x14,0x3c,0x28, +0xde,0xde,0x79,0xa7, 0x5e,0x5e,0xe2,0xbc, 0x0b,0x0b,0x1d,0x16, 0xdb,0xdb,0x76,0xad, +0xe0,0xe0,0x3b,0xdb, 0x32,0x32,0x56,0x64, 0x3a,0x3a,0x4e,0x74, 0x0a,0x0a,0x1e,0x14, +0x49,0x49,0xdb,0x92, 0x06,0x06,0x0a,0x0c, 0x24,0x24,0x6c,0x48, 0x5c,0x5c,0xe4,0xb8, +0xc2,0xc2,0x5d,0x9f, 0xd3,0xd3,0x6e,0xbd, 0xac,0xac,0xef,0x43, 0x62,0x62,0xa6,0xc4, +0x91,0x91,0xa8,0x39, 0x95,0x95,0xa4,0x31, 0xe4,0xe4,0x37,0xd3, 0x79,0x79,0x8b,0xf2, +0xe7,0xe7,0x32,0xd5, 0xc8,0xc8,0x43,0x8b, 0x37,0x37,0x59,0x6e, 0x6d,0x6d,0xb7,0xda, +0x8d,0x8d,0x8c,0x01, 0xd5,0xd5,0x64,0xb1, 0x4e,0x4e,0xd2,0x9c, 0xa9,0xa9,0xe0,0x49, +0x6c,0x6c,0xb4,0xd8, 0x56,0x56,0xfa,0xac, 0xf4,0xf4,0x07,0xf3, 0xea,0xea,0x25,0xcf, +0x65,0x65,0xaf,0xca, 0x7a,0x7a,0x8e,0xf4, 0xae,0xae,0xe9,0x47, 0x08,0x08,0x18,0x10, +0xba,0xba,0xd5,0x6f, 0x78,0x78,0x88,0xf0, 0x25,0x25,0x6f,0x4a, 0x2e,0x2e,0x72,0x5c, +0x1c,0x1c,0x24,0x38, 0xa6,0xa6,0xf1,0x57, 0xb4,0xb4,0xc7,0x73, 0xc6,0xc6,0x51,0x97, +0xe8,0xe8,0x23,0xcb, 0xdd,0xdd,0x7c,0xa1, 0x74,0x74,0x9c,0xe8, 0x1f,0x1f,0x21,0x3e, +0x4b,0x4b,0xdd,0x96, 0xbd,0xbd,0xdc,0x61, 0x8b,0x8b,0x86,0x0d, 0x8a,0x8a,0x85,0x0f, +0x70,0x70,0x90,0xe0, 0x3e,0x3e,0x42,0x7c, 0xb5,0xb5,0xc4,0x71, 0x66,0x66,0xaa,0xcc, +0x48,0x48,0xd8,0x90, 0x03,0x03,0x05,0x06, 0xf6,0xf6,0x01,0xf7, 0x0e,0x0e,0x12,0x1c, +0x61,0x61,0xa3,0xc2, 0x35,0x35,0x5f,0x6a, 0x57,0x57,0xf9,0xae, 0xb9,0xb9,0xd0,0x69, +0x86,0x86,0x91,0x17, 0xc1,0xc1,0x58,0x99, 0x1d,0x1d,0x27,0x3a, 0x9e,0x9e,0xb9,0x27, +0xe1,0xe1,0x38,0xd9, 0xf8,0xf8,0x13,0xeb, 0x98,0x98,0xb3,0x2b, 0x11,0x11,0x33,0x22, +0x69,0x69,0xbb,0xd2, 0xd9,0xd9,0x70,0xa9, 0x8e,0x8e,0x89,0x07, 0x94,0x94,0xa7,0x33, +0x9b,0x9b,0xb6,0x2d, 0x1e,0x1e,0x22,0x3c, 0x87,0x87,0x92,0x15, 0xe9,0xe9,0x20,0xc9, +0xce,0xce,0x49,0x87, 0x55,0x55,0xff,0xaa, 0x28,0x28,0x78,0x50, 0xdf,0xdf,0x7a,0xa5, +0x8c,0x8c,0x8f,0x03, 0xa1,0xa1,0xf8,0x59, 0x89,0x89,0x80,0x09, 0x0d,0x0d,0x17,0x1a, +0xbf,0xbf,0xda,0x65, 0xe6,0xe6,0x31,0xd7, 0x42,0x42,0xc6,0x84, 0x68,0x68,0xb8,0xd0, +0x41,0x41,0xc3,0x82, 0x99,0x99,0xb0,0x29, 0x2d,0x2d,0x77,0x5a, 0x0f,0x0f,0x11,0x1e, +0xb0,0xb0,0xcb,0x7b, 0x54,0x54,0xfc,0xa8, 0xbb,0xbb,0xd6,0x6d, 0x16,0x16,0x3a,0x2c +}}; + +SYMCRYPT_ALIGN_AT(1024) const BYTE SymCryptAesInvSboxMatrixMult[4][256][4] = {{// Main decryption tables +0x51,0xf4,0xa7,0x50, 0x7e,0x41,0x65,0x53, 0x1a,0x17,0xa4,0xc3, 0x3a,0x27,0x5e,0x96, +0x3b,0xab,0x6b,0xcb, 0x1f,0x9d,0x45,0xf1, 0xac,0xfa,0x58,0xab, 0x4b,0xe3,0x03,0x93, +0x20,0x30,0xfa,0x55, 0xad,0x76,0x6d,0xf6, 0x88,0xcc,0x76,0x91, 0xf5,0x02,0x4c,0x25, +0x4f,0xe5,0xd7,0xfc, 0xc5,0x2a,0xcb,0xd7, 0x26,0x35,0x44,0x80, 0xb5,0x62,0xa3,0x8f, +0xde,0xb1,0x5a,0x49, 0x25,0xba,0x1b,0x67, 0x45,0xea,0x0e,0x98, 0x5d,0xfe,0xc0,0xe1, +0xc3,0x2f,0x75,0x02, 0x81,0x4c,0xf0,0x12, 0x8d,0x46,0x97,0xa3, 0x6b,0xd3,0xf9,0xc6, +0x03,0x8f,0x5f,0xe7, 0x15,0x92,0x9c,0x95, 0xbf,0x6d,0x7a,0xeb, 0x95,0x52,0x59,0xda, +0xd4,0xbe,0x83,0x2d, 0x58,0x74,0x21,0xd3, 0x49,0xe0,0x69,0x29, 0x8e,0xc9,0xc8,0x44, +0x75,0xc2,0x89,0x6a, 0xf4,0x8e,0x79,0x78, 0x99,0x58,0x3e,0x6b, 0x27,0xb9,0x71,0xdd, +0xbe,0xe1,0x4f,0xb6, 0xf0,0x88,0xad,0x17, 0xc9,0x20,0xac,0x66, 0x7d,0xce,0x3a,0xb4, +0x63,0xdf,0x4a,0x18, 0xe5,0x1a,0x31,0x82, 0x97,0x51,0x33,0x60, 0x62,0x53,0x7f,0x45, +0xb1,0x64,0x77,0xe0, 0xbb,0x6b,0xae,0x84, 0xfe,0x81,0xa0,0x1c, 0xf9,0x08,0x2b,0x94, +0x70,0x48,0x68,0x58, 0x8f,0x45,0xfd,0x19, 0x94,0xde,0x6c,0x87, 0x52,0x7b,0xf8,0xb7, +0xab,0x73,0xd3,0x23, 0x72,0x4b,0x02,0xe2, 0xe3,0x1f,0x8f,0x57, 0x66,0x55,0xab,0x2a, +0xb2,0xeb,0x28,0x07, 0x2f,0xb5,0xc2,0x03, 0x86,0xc5,0x7b,0x9a, 0xd3,0x37,0x08,0xa5, +0x30,0x28,0x87,0xf2, 0x23,0xbf,0xa5,0xb2, 0x02,0x03,0x6a,0xba, 0xed,0x16,0x82,0x5c, +0x8a,0xcf,0x1c,0x2b, 0xa7,0x79,0xb4,0x92, 0xf3,0x07,0xf2,0xf0, 0x4e,0x69,0xe2,0xa1, +0x65,0xda,0xf4,0xcd, 0x06,0x05,0xbe,0xd5, 0xd1,0x34,0x62,0x1f, 0xc4,0xa6,0xfe,0x8a, +0x34,0x2e,0x53,0x9d, 0xa2,0xf3,0x55,0xa0, 0x05,0x8a,0xe1,0x32, 0xa4,0xf6,0xeb,0x75, +0x0b,0x83,0xec,0x39, 0x40,0x60,0xef,0xaa, 0x5e,0x71,0x9f,0x06, 0xbd,0x6e,0x10,0x51, +0x3e,0x21,0x8a,0xf9, 0x96,0xdd,0x06,0x3d, 0xdd,0x3e,0x05,0xae, 0x4d,0xe6,0xbd,0x46, +0x91,0x54,0x8d,0xb5, 0x71,0xc4,0x5d,0x05, 0x04,0x06,0xd4,0x6f, 0x60,0x50,0x15,0xff, +0x19,0x98,0xfb,0x24, 0xd6,0xbd,0xe9,0x97, 0x89,0x40,0x43,0xcc, 0x67,0xd9,0x9e,0x77, +0xb0,0xe8,0x42,0xbd, 0x07,0x89,0x8b,0x88, 0xe7,0x19,0x5b,0x38, 0x79,0xc8,0xee,0xdb, +0xa1,0x7c,0x0a,0x47, 0x7c,0x42,0x0f,0xe9, 0xf8,0x84,0x1e,0xc9, 0x00,0x00,0x00,0x00, +0x09,0x80,0x86,0x83, 0x32,0x2b,0xed,0x48, 0x1e,0x11,0x70,0xac, 0x6c,0x5a,0x72,0x4e, +0xfd,0x0e,0xff,0xfb, 0x0f,0x85,0x38,0x56, 0x3d,0xae,0xd5,0x1e, 0x36,0x2d,0x39,0x27, +0x0a,0x0f,0xd9,0x64, 0x68,0x5c,0xa6,0x21, 0x9b,0x5b,0x54,0xd1, 0x24,0x36,0x2e,0x3a, +0x0c,0x0a,0x67,0xb1, 0x93,0x57,0xe7,0x0f, 0xb4,0xee,0x96,0xd2, 0x1b,0x9b,0x91,0x9e, +0x80,0xc0,0xc5,0x4f, 0x61,0xdc,0x20,0xa2, 0x5a,0x77,0x4b,0x69, 0x1c,0x12,0x1a,0x16, +0xe2,0x93,0xba,0x0a, 0xc0,0xa0,0x2a,0xe5, 0x3c,0x22,0xe0,0x43, 0x12,0x1b,0x17,0x1d, +0x0e,0x09,0x0d,0x0b, 0xf2,0x8b,0xc7,0xad, 0x2d,0xb6,0xa8,0xb9, 0x14,0x1e,0xa9,0xc8, +0x57,0xf1,0x19,0x85, 0xaf,0x75,0x07,0x4c, 0xee,0x99,0xdd,0xbb, 0xa3,0x7f,0x60,0xfd, +0xf7,0x01,0x26,0x9f, 0x5c,0x72,0xf5,0xbc, 0x44,0x66,0x3b,0xc5, 0x5b,0xfb,0x7e,0x34, +0x8b,0x43,0x29,0x76, 0xcb,0x23,0xc6,0xdc, 0xb6,0xed,0xfc,0x68, 0xb8,0xe4,0xf1,0x63, +0xd7,0x31,0xdc,0xca, 0x42,0x63,0x85,0x10, 0x13,0x97,0x22,0x40, 0x84,0xc6,0x11,0x20, +0x85,0x4a,0x24,0x7d, 0xd2,0xbb,0x3d,0xf8, 0xae,0xf9,0x32,0x11, 0xc7,0x29,0xa1,0x6d, +0x1d,0x9e,0x2f,0x4b, 0xdc,0xb2,0x30,0xf3, 0x0d,0x86,0x52,0xec, 0x77,0xc1,0xe3,0xd0, +0x2b,0xb3,0x16,0x6c, 0xa9,0x70,0xb9,0x99, 0x11,0x94,0x48,0xfa, 0x47,0xe9,0x64,0x22, +0xa8,0xfc,0x8c,0xc4, 0xa0,0xf0,0x3f,0x1a, 0x56,0x7d,0x2c,0xd8, 0x22,0x33,0x90,0xef, +0x87,0x49,0x4e,0xc7, 0xd9,0x38,0xd1,0xc1, 0x8c,0xca,0xa2,0xfe, 0x98,0xd4,0x0b,0x36, +0xa6,0xf5,0x81,0xcf, 0xa5,0x7a,0xde,0x28, 0xda,0xb7,0x8e,0x26, 0x3f,0xad,0xbf,0xa4, +0x2c,0x3a,0x9d,0xe4, 0x50,0x78,0x92,0x0d, 0x6a,0x5f,0xcc,0x9b, 0x54,0x7e,0x46,0x62, +0xf6,0x8d,0x13,0xc2, 0x90,0xd8,0xb8,0xe8, 0x2e,0x39,0xf7,0x5e, 0x82,0xc3,0xaf,0xf5, +0x9f,0x5d,0x80,0xbe, 0x69,0xd0,0x93,0x7c, 0x6f,0xd5,0x2d,0xa9, 0xcf,0x25,0x12,0xb3, +0xc8,0xac,0x99,0x3b, 0x10,0x18,0x7d,0xa7, 0xe8,0x9c,0x63,0x6e, 0xdb,0x3b,0xbb,0x7b, +0xcd,0x26,0x78,0x09, 0x6e,0x59,0x18,0xf4, 0xec,0x9a,0xb7,0x01, 0x83,0x4f,0x9a,0xa8, +0xe6,0x95,0x6e,0x65, 0xaa,0xff,0xe6,0x7e, 0x21,0xbc,0xcf,0x08, 0xef,0x15,0xe8,0xe6, +0xba,0xe7,0x9b,0xd9, 0x4a,0x6f,0x36,0xce, 0xea,0x9f,0x09,0xd4, 0x29,0xb0,0x7c,0xd6, +0x31,0xa4,0xb2,0xaf, 0x2a,0x3f,0x23,0x31, 0xc6,0xa5,0x94,0x30, 0x35,0xa2,0x66,0xc0, +0x74,0x4e,0xbc,0x37, 0xfc,0x82,0xca,0xa6, 0xe0,0x90,0xd0,0xb0, 0x33,0xa7,0xd8,0x15, +0xf1,0x04,0x98,0x4a, 0x41,0xec,0xda,0xf7, 0x7f,0xcd,0x50,0x0e, 0x17,0x91,0xf6,0x2f, +0x76,0x4d,0xd6,0x8d, 0x43,0xef,0xb0,0x4d, 0xcc,0xaa,0x4d,0x54, 0xe4,0x96,0x04,0xdf, +0x9e,0xd1,0xb5,0xe3, 0x4c,0x6a,0x88,0x1b, 0xc1,0x2c,0x1f,0xb8, 0x46,0x65,0x51,0x7f, +0x9d,0x5e,0xea,0x04, 0x01,0x8c,0x35,0x5d, 0xfa,0x87,0x74,0x73, 0xfb,0x0b,0x41,0x2e, +0xb3,0x67,0x1d,0x5a, 0x92,0xdb,0xd2,0x52, 0xe9,0x10,0x56,0x33, 0x6d,0xd6,0x47,0x13, +0x9a,0xd7,0x61,0x8c, 0x37,0xa1,0x0c,0x7a, 0x59,0xf8,0x14,0x8e, 0xeb,0x13,0x3c,0x89, +0xce,0xa9,0x27,0xee, 0xb7,0x61,0xc9,0x35, 0xe1,0x1c,0xe5,0xed, 0x7a,0x47,0xb1,0x3c, +0x9c,0xd2,0xdf,0x59, 0x55,0xf2,0x73,0x3f, 0x18,0x14,0xce,0x79, 0x73,0xc7,0x37,0xbf, +0x53,0xf7,0xcd,0xea, 0x5f,0xfd,0xaa,0x5b, 0xdf,0x3d,0x6f,0x14, 0x78,0x44,0xdb,0x86, +0xca,0xaf,0xf3,0x81, 0xb9,0x68,0xc4,0x3e, 0x38,0x24,0x34,0x2c, 0xc2,0xa3,0x40,0x5f, +0x16,0x1d,0xc3,0x72, 0xbc,0xe2,0x25,0x0c, 0x28,0x3c,0x49,0x8b, 0xff,0x0d,0x95,0x41, +0x39,0xa8,0x01,0x71, 0x08,0x0c,0xb3,0xde, 0xd8,0xb4,0xe4,0x9c, 0x64,0x56,0xc1,0x90, +0x7b,0xcb,0x84,0x61, 0xd5,0x32,0xb6,0x70, 0x48,0x6c,0x5c,0x74, 0xd0,0xb8,0x57,0x42 +},{ +0x50,0x51,0xf4,0xa7, 0x53,0x7e,0x41,0x65, 0xc3,0x1a,0x17,0xa4, 0x96,0x3a,0x27,0x5e, +0xcb,0x3b,0xab,0x6b, 0xf1,0x1f,0x9d,0x45, 0xab,0xac,0xfa,0x58, 0x93,0x4b,0xe3,0x03, +0x55,0x20,0x30,0xfa, 0xf6,0xad,0x76,0x6d, 0x91,0x88,0xcc,0x76, 0x25,0xf5,0x02,0x4c, +0xfc,0x4f,0xe5,0xd7, 0xd7,0xc5,0x2a,0xcb, 0x80,0x26,0x35,0x44, 0x8f,0xb5,0x62,0xa3, +0x49,0xde,0xb1,0x5a, 0x67,0x25,0xba,0x1b, 0x98,0x45,0xea,0x0e, 0xe1,0x5d,0xfe,0xc0, +0x02,0xc3,0x2f,0x75, 0x12,0x81,0x4c,0xf0, 0xa3,0x8d,0x46,0x97, 0xc6,0x6b,0xd3,0xf9, +0xe7,0x03,0x8f,0x5f, 0x95,0x15,0x92,0x9c, 0xeb,0xbf,0x6d,0x7a, 0xda,0x95,0x52,0x59, +0x2d,0xd4,0xbe,0x83, 0xd3,0x58,0x74,0x21, 0x29,0x49,0xe0,0x69, 0x44,0x8e,0xc9,0xc8, +0x6a,0x75,0xc2,0x89, 0x78,0xf4,0x8e,0x79, 0x6b,0x99,0x58,0x3e, 0xdd,0x27,0xb9,0x71, +0xb6,0xbe,0xe1,0x4f, 0x17,0xf0,0x88,0xad, 0x66,0xc9,0x20,0xac, 0xb4,0x7d,0xce,0x3a, +0x18,0x63,0xdf,0x4a, 0x82,0xe5,0x1a,0x31, 0x60,0x97,0x51,0x33, 0x45,0x62,0x53,0x7f, +0xe0,0xb1,0x64,0x77, 0x84,0xbb,0x6b,0xae, 0x1c,0xfe,0x81,0xa0, 0x94,0xf9,0x08,0x2b, +0x58,0x70,0x48,0x68, 0x19,0x8f,0x45,0xfd, 0x87,0x94,0xde,0x6c, 0xb7,0x52,0x7b,0xf8, +0x23,0xab,0x73,0xd3, 0xe2,0x72,0x4b,0x02, 0x57,0xe3,0x1f,0x8f, 0x2a,0x66,0x55,0xab, +0x07,0xb2,0xeb,0x28, 0x03,0x2f,0xb5,0xc2, 0x9a,0x86,0xc5,0x7b, 0xa5,0xd3,0x37,0x08, +0xf2,0x30,0x28,0x87, 0xb2,0x23,0xbf,0xa5, 0xba,0x02,0x03,0x6a, 0x5c,0xed,0x16,0x82, +0x2b,0x8a,0xcf,0x1c, 0x92,0xa7,0x79,0xb4, 0xf0,0xf3,0x07,0xf2, 0xa1,0x4e,0x69,0xe2, +0xcd,0x65,0xda,0xf4, 0xd5,0x06,0x05,0xbe, 0x1f,0xd1,0x34,0x62, 0x8a,0xc4,0xa6,0xfe, +0x9d,0x34,0x2e,0x53, 0xa0,0xa2,0xf3,0x55, 0x32,0x05,0x8a,0xe1, 0x75,0xa4,0xf6,0xeb, +0x39,0x0b,0x83,0xec, 0xaa,0x40,0x60,0xef, 0x06,0x5e,0x71,0x9f, 0x51,0xbd,0x6e,0x10, +0xf9,0x3e,0x21,0x8a, 0x3d,0x96,0xdd,0x06, 0xae,0xdd,0x3e,0x05, 0x46,0x4d,0xe6,0xbd, +0xb5,0x91,0x54,0x8d, 0x05,0x71,0xc4,0x5d, 0x6f,0x04,0x06,0xd4, 0xff,0x60,0x50,0x15, +0x24,0x19,0x98,0xfb, 0x97,0xd6,0xbd,0xe9, 0xcc,0x89,0x40,0x43, 0x77,0x67,0xd9,0x9e, +0xbd,0xb0,0xe8,0x42, 0x88,0x07,0x89,0x8b, 0x38,0xe7,0x19,0x5b, 0xdb,0x79,0xc8,0xee, +0x47,0xa1,0x7c,0x0a, 0xe9,0x7c,0x42,0x0f, 0xc9,0xf8,0x84,0x1e, 0x00,0x00,0x00,0x00, +0x83,0x09,0x80,0x86, 0x48,0x32,0x2b,0xed, 0xac,0x1e,0x11,0x70, 0x4e,0x6c,0x5a,0x72, +0xfb,0xfd,0x0e,0xff, 0x56,0x0f,0x85,0x38, 0x1e,0x3d,0xae,0xd5, 0x27,0x36,0x2d,0x39, +0x64,0x0a,0x0f,0xd9, 0x21,0x68,0x5c,0xa6, 0xd1,0x9b,0x5b,0x54, 0x3a,0x24,0x36,0x2e, +0xb1,0x0c,0x0a,0x67, 0x0f,0x93,0x57,0xe7, 0xd2,0xb4,0xee,0x96, 0x9e,0x1b,0x9b,0x91, +0x4f,0x80,0xc0,0xc5, 0xa2,0x61,0xdc,0x20, 0x69,0x5a,0x77,0x4b, 0x16,0x1c,0x12,0x1a, +0x0a,0xe2,0x93,0xba, 0xe5,0xc0,0xa0,0x2a, 0x43,0x3c,0x22,0xe0, 0x1d,0x12,0x1b,0x17, +0x0b,0x0e,0x09,0x0d, 0xad,0xf2,0x8b,0xc7, 0xb9,0x2d,0xb6,0xa8, 0xc8,0x14,0x1e,0xa9, +0x85,0x57,0xf1,0x19, 0x4c,0xaf,0x75,0x07, 0xbb,0xee,0x99,0xdd, 0xfd,0xa3,0x7f,0x60, +0x9f,0xf7,0x01,0x26, 0xbc,0x5c,0x72,0xf5, 0xc5,0x44,0x66,0x3b, 0x34,0x5b,0xfb,0x7e, +0x76,0x8b,0x43,0x29, 0xdc,0xcb,0x23,0xc6, 0x68,0xb6,0xed,0xfc, 0x63,0xb8,0xe4,0xf1, +0xca,0xd7,0x31,0xdc, 0x10,0x42,0x63,0x85, 0x40,0x13,0x97,0x22, 0x20,0x84,0xc6,0x11, +0x7d,0x85,0x4a,0x24, 0xf8,0xd2,0xbb,0x3d, 0x11,0xae,0xf9,0x32, 0x6d,0xc7,0x29,0xa1, +0x4b,0x1d,0x9e,0x2f, 0xf3,0xdc,0xb2,0x30, 0xec,0x0d,0x86,0x52, 0xd0,0x77,0xc1,0xe3, +0x6c,0x2b,0xb3,0x16, 0x99,0xa9,0x70,0xb9, 0xfa,0x11,0x94,0x48, 0x22,0x47,0xe9,0x64, +0xc4,0xa8,0xfc,0x8c, 0x1a,0xa0,0xf0,0x3f, 0xd8,0x56,0x7d,0x2c, 0xef,0x22,0x33,0x90, +0xc7,0x87,0x49,0x4e, 0xc1,0xd9,0x38,0xd1, 0xfe,0x8c,0xca,0xa2, 0x36,0x98,0xd4,0x0b, +0xcf,0xa6,0xf5,0x81, 0x28,0xa5,0x7a,0xde, 0x26,0xda,0xb7,0x8e, 0xa4,0x3f,0xad,0xbf, +0xe4,0x2c,0x3a,0x9d, 0x0d,0x50,0x78,0x92, 0x9b,0x6a,0x5f,0xcc, 0x62,0x54,0x7e,0x46, +0xc2,0xf6,0x8d,0x13, 0xe8,0x90,0xd8,0xb8, 0x5e,0x2e,0x39,0xf7, 0xf5,0x82,0xc3,0xaf, +0xbe,0x9f,0x5d,0x80, 0x7c,0x69,0xd0,0x93, 0xa9,0x6f,0xd5,0x2d, 0xb3,0xcf,0x25,0x12, +0x3b,0xc8,0xac,0x99, 0xa7,0x10,0x18,0x7d, 0x6e,0xe8,0x9c,0x63, 0x7b,0xdb,0x3b,0xbb, +0x09,0xcd,0x26,0x78, 0xf4,0x6e,0x59,0x18, 0x01,0xec,0x9a,0xb7, 0xa8,0x83,0x4f,0x9a, +0x65,0xe6,0x95,0x6e, 0x7e,0xaa,0xff,0xe6, 0x08,0x21,0xbc,0xcf, 0xe6,0xef,0x15,0xe8, +0xd9,0xba,0xe7,0x9b, 0xce,0x4a,0x6f,0x36, 0xd4,0xea,0x9f,0x09, 0xd6,0x29,0xb0,0x7c, +0xaf,0x31,0xa4,0xb2, 0x31,0x2a,0x3f,0x23, 0x30,0xc6,0xa5,0x94, 0xc0,0x35,0xa2,0x66, +0x37,0x74,0x4e,0xbc, 0xa6,0xfc,0x82,0xca, 0xb0,0xe0,0x90,0xd0, 0x15,0x33,0xa7,0xd8, +0x4a,0xf1,0x04,0x98, 0xf7,0x41,0xec,0xda, 0x0e,0x7f,0xcd,0x50, 0x2f,0x17,0x91,0xf6, +0x8d,0x76,0x4d,0xd6, 0x4d,0x43,0xef,0xb0, 0x54,0xcc,0xaa,0x4d, 0xdf,0xe4,0x96,0x04, +0xe3,0x9e,0xd1,0xb5, 0x1b,0x4c,0x6a,0x88, 0xb8,0xc1,0x2c,0x1f, 0x7f,0x46,0x65,0x51, +0x04,0x9d,0x5e,0xea, 0x5d,0x01,0x8c,0x35, 0x73,0xfa,0x87,0x74, 0x2e,0xfb,0x0b,0x41, +0x5a,0xb3,0x67,0x1d, 0x52,0x92,0xdb,0xd2, 0x33,0xe9,0x10,0x56, 0x13,0x6d,0xd6,0x47, +0x8c,0x9a,0xd7,0x61, 0x7a,0x37,0xa1,0x0c, 0x8e,0x59,0xf8,0x14, 0x89,0xeb,0x13,0x3c, +0xee,0xce,0xa9,0x27, 0x35,0xb7,0x61,0xc9, 0xed,0xe1,0x1c,0xe5, 0x3c,0x7a,0x47,0xb1, +0x59,0x9c,0xd2,0xdf, 0x3f,0x55,0xf2,0x73, 0x79,0x18,0x14,0xce, 0xbf,0x73,0xc7,0x37, +0xea,0x53,0xf7,0xcd, 0x5b,0x5f,0xfd,0xaa, 0x14,0xdf,0x3d,0x6f, 0x86,0x78,0x44,0xdb, +0x81,0xca,0xaf,0xf3, 0x3e,0xb9,0x68,0xc4, 0x2c,0x38,0x24,0x34, 0x5f,0xc2,0xa3,0x40, +0x72,0x16,0x1d,0xc3, 0x0c,0xbc,0xe2,0x25, 0x8b,0x28,0x3c,0x49, 0x41,0xff,0x0d,0x95, +0x71,0x39,0xa8,0x01, 0xde,0x08,0x0c,0xb3, 0x9c,0xd8,0xb4,0xe4, 0x90,0x64,0x56,0xc1, +0x61,0x7b,0xcb,0x84, 0x70,0xd5,0x32,0xb6, 0x74,0x48,0x6c,0x5c, 0x42,0xd0,0xb8,0x57 +},{ +0xa7,0x50,0x51,0xf4, 0x65,0x53,0x7e,0x41, 0xa4,0xc3,0x1a,0x17, 0x5e,0x96,0x3a,0x27, +0x6b,0xcb,0x3b,0xab, 0x45,0xf1,0x1f,0x9d, 0x58,0xab,0xac,0xfa, 0x03,0x93,0x4b,0xe3, +0xfa,0x55,0x20,0x30, 0x6d,0xf6,0xad,0x76, 0x76,0x91,0x88,0xcc, 0x4c,0x25,0xf5,0x02, +0xd7,0xfc,0x4f,0xe5, 0xcb,0xd7,0xc5,0x2a, 0x44,0x80,0x26,0x35, 0xa3,0x8f,0xb5,0x62, +0x5a,0x49,0xde,0xb1, 0x1b,0x67,0x25,0xba, 0x0e,0x98,0x45,0xea, 0xc0,0xe1,0x5d,0xfe, +0x75,0x02,0xc3,0x2f, 0xf0,0x12,0x81,0x4c, 0x97,0xa3,0x8d,0x46, 0xf9,0xc6,0x6b,0xd3, +0x5f,0xe7,0x03,0x8f, 0x9c,0x95,0x15,0x92, 0x7a,0xeb,0xbf,0x6d, 0x59,0xda,0x95,0x52, +0x83,0x2d,0xd4,0xbe, 0x21,0xd3,0x58,0x74, 0x69,0x29,0x49,0xe0, 0xc8,0x44,0x8e,0xc9, +0x89,0x6a,0x75,0xc2, 0x79,0x78,0xf4,0x8e, 0x3e,0x6b,0x99,0x58, 0x71,0xdd,0x27,0xb9, +0x4f,0xb6,0xbe,0xe1, 0xad,0x17,0xf0,0x88, 0xac,0x66,0xc9,0x20, 0x3a,0xb4,0x7d,0xce, +0x4a,0x18,0x63,0xdf, 0x31,0x82,0xe5,0x1a, 0x33,0x60,0x97,0x51, 0x7f,0x45,0x62,0x53, +0x77,0xe0,0xb1,0x64, 0xae,0x84,0xbb,0x6b, 0xa0,0x1c,0xfe,0x81, 0x2b,0x94,0xf9,0x08, +0x68,0x58,0x70,0x48, 0xfd,0x19,0x8f,0x45, 0x6c,0x87,0x94,0xde, 0xf8,0xb7,0x52,0x7b, +0xd3,0x23,0xab,0x73, 0x02,0xe2,0x72,0x4b, 0x8f,0x57,0xe3,0x1f, 0xab,0x2a,0x66,0x55, +0x28,0x07,0xb2,0xeb, 0xc2,0x03,0x2f,0xb5, 0x7b,0x9a,0x86,0xc5, 0x08,0xa5,0xd3,0x37, +0x87,0xf2,0x30,0x28, 0xa5,0xb2,0x23,0xbf, 0x6a,0xba,0x02,0x03, 0x82,0x5c,0xed,0x16, +0x1c,0x2b,0x8a,0xcf, 0xb4,0x92,0xa7,0x79, 0xf2,0xf0,0xf3,0x07, 0xe2,0xa1,0x4e,0x69, +0xf4,0xcd,0x65,0xda, 0xbe,0xd5,0x06,0x05, 0x62,0x1f,0xd1,0x34, 0xfe,0x8a,0xc4,0xa6, +0x53,0x9d,0x34,0x2e, 0x55,0xa0,0xa2,0xf3, 0xe1,0x32,0x05,0x8a, 0xeb,0x75,0xa4,0xf6, +0xec,0x39,0x0b,0x83, 0xef,0xaa,0x40,0x60, 0x9f,0x06,0x5e,0x71, 0x10,0x51,0xbd,0x6e, +0x8a,0xf9,0x3e,0x21, 0x06,0x3d,0x96,0xdd, 0x05,0xae,0xdd,0x3e, 0xbd,0x46,0x4d,0xe6, +0x8d,0xb5,0x91,0x54, 0x5d,0x05,0x71,0xc4, 0xd4,0x6f,0x04,0x06, 0x15,0xff,0x60,0x50, +0xfb,0x24,0x19,0x98, 0xe9,0x97,0xd6,0xbd, 0x43,0xcc,0x89,0x40, 0x9e,0x77,0x67,0xd9, +0x42,0xbd,0xb0,0xe8, 0x8b,0x88,0x07,0x89, 0x5b,0x38,0xe7,0x19, 0xee,0xdb,0x79,0xc8, +0x0a,0x47,0xa1,0x7c, 0x0f,0xe9,0x7c,0x42, 0x1e,0xc9,0xf8,0x84, 0x00,0x00,0x00,0x00, +0x86,0x83,0x09,0x80, 0xed,0x48,0x32,0x2b, 0x70,0xac,0x1e,0x11, 0x72,0x4e,0x6c,0x5a, +0xff,0xfb,0xfd,0x0e, 0x38,0x56,0x0f,0x85, 0xd5,0x1e,0x3d,0xae, 0x39,0x27,0x36,0x2d, +0xd9,0x64,0x0a,0x0f, 0xa6,0x21,0x68,0x5c, 0x54,0xd1,0x9b,0x5b, 0x2e,0x3a,0x24,0x36, +0x67,0xb1,0x0c,0x0a, 0xe7,0x0f,0x93,0x57, 0x96,0xd2,0xb4,0xee, 0x91,0x9e,0x1b,0x9b, +0xc5,0x4f,0x80,0xc0, 0x20,0xa2,0x61,0xdc, 0x4b,0x69,0x5a,0x77, 0x1a,0x16,0x1c,0x12, +0xba,0x0a,0xe2,0x93, 0x2a,0xe5,0xc0,0xa0, 0xe0,0x43,0x3c,0x22, 0x17,0x1d,0x12,0x1b, +0x0d,0x0b,0x0e,0x09, 0xc7,0xad,0xf2,0x8b, 0xa8,0xb9,0x2d,0xb6, 0xa9,0xc8,0x14,0x1e, +0x19,0x85,0x57,0xf1, 0x07,0x4c,0xaf,0x75, 0xdd,0xbb,0xee,0x99, 0x60,0xfd,0xa3,0x7f, +0x26,0x9f,0xf7,0x01, 0xf5,0xbc,0x5c,0x72, 0x3b,0xc5,0x44,0x66, 0x7e,0x34,0x5b,0xfb, +0x29,0x76,0x8b,0x43, 0xc6,0xdc,0xcb,0x23, 0xfc,0x68,0xb6,0xed, 0xf1,0x63,0xb8,0xe4, +0xdc,0xca,0xd7,0x31, 0x85,0x10,0x42,0x63, 0x22,0x40,0x13,0x97, 0x11,0x20,0x84,0xc6, +0x24,0x7d,0x85,0x4a, 0x3d,0xf8,0xd2,0xbb, 0x32,0x11,0xae,0xf9, 0xa1,0x6d,0xc7,0x29, +0x2f,0x4b,0x1d,0x9e, 0x30,0xf3,0xdc,0xb2, 0x52,0xec,0x0d,0x86, 0xe3,0xd0,0x77,0xc1, +0x16,0x6c,0x2b,0xb3, 0xb9,0x99,0xa9,0x70, 0x48,0xfa,0x11,0x94, 0x64,0x22,0x47,0xe9, +0x8c,0xc4,0xa8,0xfc, 0x3f,0x1a,0xa0,0xf0, 0x2c,0xd8,0x56,0x7d, 0x90,0xef,0x22,0x33, +0x4e,0xc7,0x87,0x49, 0xd1,0xc1,0xd9,0x38, 0xa2,0xfe,0x8c,0xca, 0x0b,0x36,0x98,0xd4, +0x81,0xcf,0xa6,0xf5, 0xde,0x28,0xa5,0x7a, 0x8e,0x26,0xda,0xb7, 0xbf,0xa4,0x3f,0xad, +0x9d,0xe4,0x2c,0x3a, 0x92,0x0d,0x50,0x78, 0xcc,0x9b,0x6a,0x5f, 0x46,0x62,0x54,0x7e, +0x13,0xc2,0xf6,0x8d, 0xb8,0xe8,0x90,0xd8, 0xf7,0x5e,0x2e,0x39, 0xaf,0xf5,0x82,0xc3, +0x80,0xbe,0x9f,0x5d, 0x93,0x7c,0x69,0xd0, 0x2d,0xa9,0x6f,0xd5, 0x12,0xb3,0xcf,0x25, +0x99,0x3b,0xc8,0xac, 0x7d,0xa7,0x10,0x18, 0x63,0x6e,0xe8,0x9c, 0xbb,0x7b,0xdb,0x3b, +0x78,0x09,0xcd,0x26, 0x18,0xf4,0x6e,0x59, 0xb7,0x01,0xec,0x9a, 0x9a,0xa8,0x83,0x4f, +0x6e,0x65,0xe6,0x95, 0xe6,0x7e,0xaa,0xff, 0xcf,0x08,0x21,0xbc, 0xe8,0xe6,0xef,0x15, +0x9b,0xd9,0xba,0xe7, 0x36,0xce,0x4a,0x6f, 0x09,0xd4,0xea,0x9f, 0x7c,0xd6,0x29,0xb0, +0xb2,0xaf,0x31,0xa4, 0x23,0x31,0x2a,0x3f, 0x94,0x30,0xc6,0xa5, 0x66,0xc0,0x35,0xa2, +0xbc,0x37,0x74,0x4e, 0xca,0xa6,0xfc,0x82, 0xd0,0xb0,0xe0,0x90, 0xd8,0x15,0x33,0xa7, +0x98,0x4a,0xf1,0x04, 0xda,0xf7,0x41,0xec, 0x50,0x0e,0x7f,0xcd, 0xf6,0x2f,0x17,0x91, +0xd6,0x8d,0x76,0x4d, 0xb0,0x4d,0x43,0xef, 0x4d,0x54,0xcc,0xaa, 0x04,0xdf,0xe4,0x96, +0xb5,0xe3,0x9e,0xd1, 0x88,0x1b,0x4c,0x6a, 0x1f,0xb8,0xc1,0x2c, 0x51,0x7f,0x46,0x65, +0xea,0x04,0x9d,0x5e, 0x35,0x5d,0x01,0x8c, 0x74,0x73,0xfa,0x87, 0x41,0x2e,0xfb,0x0b, +0x1d,0x5a,0xb3,0x67, 0xd2,0x52,0x92,0xdb, 0x56,0x33,0xe9,0x10, 0x47,0x13,0x6d,0xd6, +0x61,0x8c,0x9a,0xd7, 0x0c,0x7a,0x37,0xa1, 0x14,0x8e,0x59,0xf8, 0x3c,0x89,0xeb,0x13, +0x27,0xee,0xce,0xa9, 0xc9,0x35,0xb7,0x61, 0xe5,0xed,0xe1,0x1c, 0xb1,0x3c,0x7a,0x47, +0xdf,0x59,0x9c,0xd2, 0x73,0x3f,0x55,0xf2, 0xce,0x79,0x18,0x14, 0x37,0xbf,0x73,0xc7, +0xcd,0xea,0x53,0xf7, 0xaa,0x5b,0x5f,0xfd, 0x6f,0x14,0xdf,0x3d, 0xdb,0x86,0x78,0x44, +0xf3,0x81,0xca,0xaf, 0xc4,0x3e,0xb9,0x68, 0x34,0x2c,0x38,0x24, 0x40,0x5f,0xc2,0xa3, +0xc3,0x72,0x16,0x1d, 0x25,0x0c,0xbc,0xe2, 0x49,0x8b,0x28,0x3c, 0x95,0x41,0xff,0x0d, +0x01,0x71,0x39,0xa8, 0xb3,0xde,0x08,0x0c, 0xe4,0x9c,0xd8,0xb4, 0xc1,0x90,0x64,0x56, +0x84,0x61,0x7b,0xcb, 0xb6,0x70,0xd5,0x32, 0x5c,0x74,0x48,0x6c, 0x57,0x42,0xd0,0xb8 +},{ +0xf4,0xa7,0x50,0x51, 0x41,0x65,0x53,0x7e, 0x17,0xa4,0xc3,0x1a, 0x27,0x5e,0x96,0x3a, +0xab,0x6b,0xcb,0x3b, 0x9d,0x45,0xf1,0x1f, 0xfa,0x58,0xab,0xac, 0xe3,0x03,0x93,0x4b, +0x30,0xfa,0x55,0x20, 0x76,0x6d,0xf6,0xad, 0xcc,0x76,0x91,0x88, 0x02,0x4c,0x25,0xf5, +0xe5,0xd7,0xfc,0x4f, 0x2a,0xcb,0xd7,0xc5, 0x35,0x44,0x80,0x26, 0x62,0xa3,0x8f,0xb5, +0xb1,0x5a,0x49,0xde, 0xba,0x1b,0x67,0x25, 0xea,0x0e,0x98,0x45, 0xfe,0xc0,0xe1,0x5d, +0x2f,0x75,0x02,0xc3, 0x4c,0xf0,0x12,0x81, 0x46,0x97,0xa3,0x8d, 0xd3,0xf9,0xc6,0x6b, +0x8f,0x5f,0xe7,0x03, 0x92,0x9c,0x95,0x15, 0x6d,0x7a,0xeb,0xbf, 0x52,0x59,0xda,0x95, +0xbe,0x83,0x2d,0xd4, 0x74,0x21,0xd3,0x58, 0xe0,0x69,0x29,0x49, 0xc9,0xc8,0x44,0x8e, +0xc2,0x89,0x6a,0x75, 0x8e,0x79,0x78,0xf4, 0x58,0x3e,0x6b,0x99, 0xb9,0x71,0xdd,0x27, +0xe1,0x4f,0xb6,0xbe, 0x88,0xad,0x17,0xf0, 0x20,0xac,0x66,0xc9, 0xce,0x3a,0xb4,0x7d, +0xdf,0x4a,0x18,0x63, 0x1a,0x31,0x82,0xe5, 0x51,0x33,0x60,0x97, 0x53,0x7f,0x45,0x62, +0x64,0x77,0xe0,0xb1, 0x6b,0xae,0x84,0xbb, 0x81,0xa0,0x1c,0xfe, 0x08,0x2b,0x94,0xf9, +0x48,0x68,0x58,0x70, 0x45,0xfd,0x19,0x8f, 0xde,0x6c,0x87,0x94, 0x7b,0xf8,0xb7,0x52, +0x73,0xd3,0x23,0xab, 0x4b,0x02,0xe2,0x72, 0x1f,0x8f,0x57,0xe3, 0x55,0xab,0x2a,0x66, +0xeb,0x28,0x07,0xb2, 0xb5,0xc2,0x03,0x2f, 0xc5,0x7b,0x9a,0x86, 0x37,0x08,0xa5,0xd3, +0x28,0x87,0xf2,0x30, 0xbf,0xa5,0xb2,0x23, 0x03,0x6a,0xba,0x02, 0x16,0x82,0x5c,0xed, +0xcf,0x1c,0x2b,0x8a, 0x79,0xb4,0x92,0xa7, 0x07,0xf2,0xf0,0xf3, 0x69,0xe2,0xa1,0x4e, +0xda,0xf4,0xcd,0x65, 0x05,0xbe,0xd5,0x06, 0x34,0x62,0x1f,0xd1, 0xa6,0xfe,0x8a,0xc4, +0x2e,0x53,0x9d,0x34, 0xf3,0x55,0xa0,0xa2, 0x8a,0xe1,0x32,0x05, 0xf6,0xeb,0x75,0xa4, +0x83,0xec,0x39,0x0b, 0x60,0xef,0xaa,0x40, 0x71,0x9f,0x06,0x5e, 0x6e,0x10,0x51,0xbd, +0x21,0x8a,0xf9,0x3e, 0xdd,0x06,0x3d,0x96, 0x3e,0x05,0xae,0xdd, 0xe6,0xbd,0x46,0x4d, +0x54,0x8d,0xb5,0x91, 0xc4,0x5d,0x05,0x71, 0x06,0xd4,0x6f,0x04, 0x50,0x15,0xff,0x60, +0x98,0xfb,0x24,0x19, 0xbd,0xe9,0x97,0xd6, 0x40,0x43,0xcc,0x89, 0xd9,0x9e,0x77,0x67, +0xe8,0x42,0xbd,0xb0, 0x89,0x8b,0x88,0x07, 0x19,0x5b,0x38,0xe7, 0xc8,0xee,0xdb,0x79, +0x7c,0x0a,0x47,0xa1, 0x42,0x0f,0xe9,0x7c, 0x84,0x1e,0xc9,0xf8, 0x00,0x00,0x00,0x00, +0x80,0x86,0x83,0x09, 0x2b,0xed,0x48,0x32, 0x11,0x70,0xac,0x1e, 0x5a,0x72,0x4e,0x6c, +0x0e,0xff,0xfb,0xfd, 0x85,0x38,0x56,0x0f, 0xae,0xd5,0x1e,0x3d, 0x2d,0x39,0x27,0x36, +0x0f,0xd9,0x64,0x0a, 0x5c,0xa6,0x21,0x68, 0x5b,0x54,0xd1,0x9b, 0x36,0x2e,0x3a,0x24, +0x0a,0x67,0xb1,0x0c, 0x57,0xe7,0x0f,0x93, 0xee,0x96,0xd2,0xb4, 0x9b,0x91,0x9e,0x1b, +0xc0,0xc5,0x4f,0x80, 0xdc,0x20,0xa2,0x61, 0x77,0x4b,0x69,0x5a, 0x12,0x1a,0x16,0x1c, +0x93,0xba,0x0a,0xe2, 0xa0,0x2a,0xe5,0xc0, 0x22,0xe0,0x43,0x3c, 0x1b,0x17,0x1d,0x12, +0x09,0x0d,0x0b,0x0e, 0x8b,0xc7,0xad,0xf2, 0xb6,0xa8,0xb9,0x2d, 0x1e,0xa9,0xc8,0x14, +0xf1,0x19,0x85,0x57, 0x75,0x07,0x4c,0xaf, 0x99,0xdd,0xbb,0xee, 0x7f,0x60,0xfd,0xa3, +0x01,0x26,0x9f,0xf7, 0x72,0xf5,0xbc,0x5c, 0x66,0x3b,0xc5,0x44, 0xfb,0x7e,0x34,0x5b, +0x43,0x29,0x76,0x8b, 0x23,0xc6,0xdc,0xcb, 0xed,0xfc,0x68,0xb6, 0xe4,0xf1,0x63,0xb8, +0x31,0xdc,0xca,0xd7, 0x63,0x85,0x10,0x42, 0x97,0x22,0x40,0x13, 0xc6,0x11,0x20,0x84, +0x4a,0x24,0x7d,0x85, 0xbb,0x3d,0xf8,0xd2, 0xf9,0x32,0x11,0xae, 0x29,0xa1,0x6d,0xc7, +0x9e,0x2f,0x4b,0x1d, 0xb2,0x30,0xf3,0xdc, 0x86,0x52,0xec,0x0d, 0xc1,0xe3,0xd0,0x77, +0xb3,0x16,0x6c,0x2b, 0x70,0xb9,0x99,0xa9, 0x94,0x48,0xfa,0x11, 0xe9,0x64,0x22,0x47, +0xfc,0x8c,0xc4,0xa8, 0xf0,0x3f,0x1a,0xa0, 0x7d,0x2c,0xd8,0x56, 0x33,0x90,0xef,0x22, +0x49,0x4e,0xc7,0x87, 0x38,0xd1,0xc1,0xd9, 0xca,0xa2,0xfe,0x8c, 0xd4,0x0b,0x36,0x98, +0xf5,0x81,0xcf,0xa6, 0x7a,0xde,0x28,0xa5, 0xb7,0x8e,0x26,0xda, 0xad,0xbf,0xa4,0x3f, +0x3a,0x9d,0xe4,0x2c, 0x78,0x92,0x0d,0x50, 0x5f,0xcc,0x9b,0x6a, 0x7e,0x46,0x62,0x54, +0x8d,0x13,0xc2,0xf6, 0xd8,0xb8,0xe8,0x90, 0x39,0xf7,0x5e,0x2e, 0xc3,0xaf,0xf5,0x82, +0x5d,0x80,0xbe,0x9f, 0xd0,0x93,0x7c,0x69, 0xd5,0x2d,0xa9,0x6f, 0x25,0x12,0xb3,0xcf, +0xac,0x99,0x3b,0xc8, 0x18,0x7d,0xa7,0x10, 0x9c,0x63,0x6e,0xe8, 0x3b,0xbb,0x7b,0xdb, +0x26,0x78,0x09,0xcd, 0x59,0x18,0xf4,0x6e, 0x9a,0xb7,0x01,0xec, 0x4f,0x9a,0xa8,0x83, +0x95,0x6e,0x65,0xe6, 0xff,0xe6,0x7e,0xaa, 0xbc,0xcf,0x08,0x21, 0x15,0xe8,0xe6,0xef, +0xe7,0x9b,0xd9,0xba, 0x6f,0x36,0xce,0x4a, 0x9f,0x09,0xd4,0xea, 0xb0,0x7c,0xd6,0x29, +0xa4,0xb2,0xaf,0x31, 0x3f,0x23,0x31,0x2a, 0xa5,0x94,0x30,0xc6, 0xa2,0x66,0xc0,0x35, +0x4e,0xbc,0x37,0x74, 0x82,0xca,0xa6,0xfc, 0x90,0xd0,0xb0,0xe0, 0xa7,0xd8,0x15,0x33, +0x04,0x98,0x4a,0xf1, 0xec,0xda,0xf7,0x41, 0xcd,0x50,0x0e,0x7f, 0x91,0xf6,0x2f,0x17, +0x4d,0xd6,0x8d,0x76, 0xef,0xb0,0x4d,0x43, 0xaa,0x4d,0x54,0xcc, 0x96,0x04,0xdf,0xe4, +0xd1,0xb5,0xe3,0x9e, 0x6a,0x88,0x1b,0x4c, 0x2c,0x1f,0xb8,0xc1, 0x65,0x51,0x7f,0x46, +0x5e,0xea,0x04,0x9d, 0x8c,0x35,0x5d,0x01, 0x87,0x74,0x73,0xfa, 0x0b,0x41,0x2e,0xfb, +0x67,0x1d,0x5a,0xb3, 0xdb,0xd2,0x52,0x92, 0x10,0x56,0x33,0xe9, 0xd6,0x47,0x13,0x6d, +0xd7,0x61,0x8c,0x9a, 0xa1,0x0c,0x7a,0x37, 0xf8,0x14,0x8e,0x59, 0x13,0x3c,0x89,0xeb, +0xa9,0x27,0xee,0xce, 0x61,0xc9,0x35,0xb7, 0x1c,0xe5,0xed,0xe1, 0x47,0xb1,0x3c,0x7a, +0xd2,0xdf,0x59,0x9c, 0xf2,0x73,0x3f,0x55, 0x14,0xce,0x79,0x18, 0xc7,0x37,0xbf,0x73, +0xf7,0xcd,0xea,0x53, 0xfd,0xaa,0x5b,0x5f, 0x3d,0x6f,0x14,0xdf, 0x44,0xdb,0x86,0x78, +0xaf,0xf3,0x81,0xca, 0x68,0xc4,0x3e,0xb9, 0x24,0x34,0x2c,0x38, 0xa3,0x40,0x5f,0xc2, +0x1d,0xc3,0x72,0x16, 0xe2,0x25,0x0c,0xbc, 0x3c,0x49,0x8b,0x28, 0x0d,0x95,0x41,0xff, +0xa8,0x01,0x71,0x39, 0x0c,0xb3,0xde,0x08, 0xb4,0xe4,0x9c,0xd8, 0x56,0xc1,0x90,0x64, +0xcb,0x84,0x61,0x7b, 0x32,0xb6,0x70,0xd5, 0x6c,0x5c,0x74,0x48, 0xb8,0x57,0x42,0xd0 +}}; + +SYMCRYPT_ALIGN_AT(1024) const BYTE SymCryptAesInvMatrixMult[4][256][4] = {{ // For computing decryption round keys +0x00,0x00,0x00,0x00, 0x0e,0x09,0x0d,0x0b, 0x1c,0x12,0x1a,0x16, 0x12,0x1b,0x17,0x1d, +0x38,0x24,0x34,0x2c, 0x36,0x2d,0x39,0x27, 0x24,0x36,0x2e,0x3a, 0x2a,0x3f,0x23,0x31, +0x70,0x48,0x68,0x58, 0x7e,0x41,0x65,0x53, 0x6c,0x5a,0x72,0x4e, 0x62,0x53,0x7f,0x45, +0x48,0x6c,0x5c,0x74, 0x46,0x65,0x51,0x7f, 0x54,0x7e,0x46,0x62, 0x5a,0x77,0x4b,0x69, +0xe0,0x90,0xd0,0xb0, 0xee,0x99,0xdd,0xbb, 0xfc,0x82,0xca,0xa6, 0xf2,0x8b,0xc7,0xad, +0xd8,0xb4,0xe4,0x9c, 0xd6,0xbd,0xe9,0x97, 0xc4,0xa6,0xfe,0x8a, 0xca,0xaf,0xf3,0x81, +0x90,0xd8,0xb8,0xe8, 0x9e,0xd1,0xb5,0xe3, 0x8c,0xca,0xa2,0xfe, 0x82,0xc3,0xaf,0xf5, +0xa8,0xfc,0x8c,0xc4, 0xa6,0xf5,0x81,0xcf, 0xb4,0xee,0x96,0xd2, 0xba,0xe7,0x9b,0xd9, +0xdb,0x3b,0xbb,0x7b, 0xd5,0x32,0xb6,0x70, 0xc7,0x29,0xa1,0x6d, 0xc9,0x20,0xac,0x66, +0xe3,0x1f,0x8f,0x57, 0xed,0x16,0x82,0x5c, 0xff,0x0d,0x95,0x41, 0xf1,0x04,0x98,0x4a, +0xab,0x73,0xd3,0x23, 0xa5,0x7a,0xde,0x28, 0xb7,0x61,0xc9,0x35, 0xb9,0x68,0xc4,0x3e, +0x93,0x57,0xe7,0x0f, 0x9d,0x5e,0xea,0x04, 0x8f,0x45,0xfd,0x19, 0x81,0x4c,0xf0,0x12, +0x3b,0xab,0x6b,0xcb, 0x35,0xa2,0x66,0xc0, 0x27,0xb9,0x71,0xdd, 0x29,0xb0,0x7c,0xd6, +0x03,0x8f,0x5f,0xe7, 0x0d,0x86,0x52,0xec, 0x1f,0x9d,0x45,0xf1, 0x11,0x94,0x48,0xfa, +0x4b,0xe3,0x03,0x93, 0x45,0xea,0x0e,0x98, 0x57,0xf1,0x19,0x85, 0x59,0xf8,0x14,0x8e, +0x73,0xc7,0x37,0xbf, 0x7d,0xce,0x3a,0xb4, 0x6f,0xd5,0x2d,0xa9, 0x61,0xdc,0x20,0xa2, +0xad,0x76,0x6d,0xf6, 0xa3,0x7f,0x60,0xfd, 0xb1,0x64,0x77,0xe0, 0xbf,0x6d,0x7a,0xeb, +0x95,0x52,0x59,0xda, 0x9b,0x5b,0x54,0xd1, 0x89,0x40,0x43,0xcc, 0x87,0x49,0x4e,0xc7, +0xdd,0x3e,0x05,0xae, 0xd3,0x37,0x08,0xa5, 0xc1,0x2c,0x1f,0xb8, 0xcf,0x25,0x12,0xb3, +0xe5,0x1a,0x31,0x82, 0xeb,0x13,0x3c,0x89, 0xf9,0x08,0x2b,0x94, 0xf7,0x01,0x26,0x9f, +0x4d,0xe6,0xbd,0x46, 0x43,0xef,0xb0,0x4d, 0x51,0xf4,0xa7,0x50, 0x5f,0xfd,0xaa,0x5b, +0x75,0xc2,0x89,0x6a, 0x7b,0xcb,0x84,0x61, 0x69,0xd0,0x93,0x7c, 0x67,0xd9,0x9e,0x77, +0x3d,0xae,0xd5,0x1e, 0x33,0xa7,0xd8,0x15, 0x21,0xbc,0xcf,0x08, 0x2f,0xb5,0xc2,0x03, +0x05,0x8a,0xe1,0x32, 0x0b,0x83,0xec,0x39, 0x19,0x98,0xfb,0x24, 0x17,0x91,0xf6,0x2f, +0x76,0x4d,0xd6,0x8d, 0x78,0x44,0xdb,0x86, 0x6a,0x5f,0xcc,0x9b, 0x64,0x56,0xc1,0x90, +0x4e,0x69,0xe2,0xa1, 0x40,0x60,0xef,0xaa, 0x52,0x7b,0xf8,0xb7, 0x5c,0x72,0xf5,0xbc, +0x06,0x05,0xbe,0xd5, 0x08,0x0c,0xb3,0xde, 0x1a,0x17,0xa4,0xc3, 0x14,0x1e,0xa9,0xc8, +0x3e,0x21,0x8a,0xf9, 0x30,0x28,0x87,0xf2, 0x22,0x33,0x90,0xef, 0x2c,0x3a,0x9d,0xe4, +0x96,0xdd,0x06,0x3d, 0x98,0xd4,0x0b,0x36, 0x8a,0xcf,0x1c,0x2b, 0x84,0xc6,0x11,0x20, +0xae,0xf9,0x32,0x11, 0xa0,0xf0,0x3f,0x1a, 0xb2,0xeb,0x28,0x07, 0xbc,0xe2,0x25,0x0c, +0xe6,0x95,0x6e,0x65, 0xe8,0x9c,0x63,0x6e, 0xfa,0x87,0x74,0x73, 0xf4,0x8e,0x79,0x78, +0xde,0xb1,0x5a,0x49, 0xd0,0xb8,0x57,0x42, 0xc2,0xa3,0x40,0x5f, 0xcc,0xaa,0x4d,0x54, +0x41,0xec,0xda,0xf7, 0x4f,0xe5,0xd7,0xfc, 0x5d,0xfe,0xc0,0xe1, 0x53,0xf7,0xcd,0xea, +0x79,0xc8,0xee,0xdb, 0x77,0xc1,0xe3,0xd0, 0x65,0xda,0xf4,0xcd, 0x6b,0xd3,0xf9,0xc6, +0x31,0xa4,0xb2,0xaf, 0x3f,0xad,0xbf,0xa4, 0x2d,0xb6,0xa8,0xb9, 0x23,0xbf,0xa5,0xb2, +0x09,0x80,0x86,0x83, 0x07,0x89,0x8b,0x88, 0x15,0x92,0x9c,0x95, 0x1b,0x9b,0x91,0x9e, +0xa1,0x7c,0x0a,0x47, 0xaf,0x75,0x07,0x4c, 0xbd,0x6e,0x10,0x51, 0xb3,0x67,0x1d,0x5a, +0x99,0x58,0x3e,0x6b, 0x97,0x51,0x33,0x60, 0x85,0x4a,0x24,0x7d, 0x8b,0x43,0x29,0x76, +0xd1,0x34,0x62,0x1f, 0xdf,0x3d,0x6f,0x14, 0xcd,0x26,0x78,0x09, 0xc3,0x2f,0x75,0x02, +0xe9,0x10,0x56,0x33, 0xe7,0x19,0x5b,0x38, 0xf5,0x02,0x4c,0x25, 0xfb,0x0b,0x41,0x2e, +0x9a,0xd7,0x61,0x8c, 0x94,0xde,0x6c,0x87, 0x86,0xc5,0x7b,0x9a, 0x88,0xcc,0x76,0x91, +0xa2,0xf3,0x55,0xa0, 0xac,0xfa,0x58,0xab, 0xbe,0xe1,0x4f,0xb6, 0xb0,0xe8,0x42,0xbd, +0xea,0x9f,0x09,0xd4, 0xe4,0x96,0x04,0xdf, 0xf6,0x8d,0x13,0xc2, 0xf8,0x84,0x1e,0xc9, +0xd2,0xbb,0x3d,0xf8, 0xdc,0xb2,0x30,0xf3, 0xce,0xa9,0x27,0xee, 0xc0,0xa0,0x2a,0xe5, +0x7a,0x47,0xb1,0x3c, 0x74,0x4e,0xbc,0x37, 0x66,0x55,0xab,0x2a, 0x68,0x5c,0xa6,0x21, +0x42,0x63,0x85,0x10, 0x4c,0x6a,0x88,0x1b, 0x5e,0x71,0x9f,0x06, 0x50,0x78,0x92,0x0d, +0x0a,0x0f,0xd9,0x64, 0x04,0x06,0xd4,0x6f, 0x16,0x1d,0xc3,0x72, 0x18,0x14,0xce,0x79, +0x32,0x2b,0xed,0x48, 0x3c,0x22,0xe0,0x43, 0x2e,0x39,0xf7,0x5e, 0x20,0x30,0xfa,0x55, +0xec,0x9a,0xb7,0x01, 0xe2,0x93,0xba,0x0a, 0xf0,0x88,0xad,0x17, 0xfe,0x81,0xa0,0x1c, +0xd4,0xbe,0x83,0x2d, 0xda,0xb7,0x8e,0x26, 0xc8,0xac,0x99,0x3b, 0xc6,0xa5,0x94,0x30, +0x9c,0xd2,0xdf,0x59, 0x92,0xdb,0xd2,0x52, 0x80,0xc0,0xc5,0x4f, 0x8e,0xc9,0xc8,0x44, +0xa4,0xf6,0xeb,0x75, 0xaa,0xff,0xe6,0x7e, 0xb8,0xe4,0xf1,0x63, 0xb6,0xed,0xfc,0x68, +0x0c,0x0a,0x67,0xb1, 0x02,0x03,0x6a,0xba, 0x10,0x18,0x7d,0xa7, 0x1e,0x11,0x70,0xac, +0x34,0x2e,0x53,0x9d, 0x3a,0x27,0x5e,0x96, 0x28,0x3c,0x49,0x8b, 0x26,0x35,0x44,0x80, +0x7c,0x42,0x0f,0xe9, 0x72,0x4b,0x02,0xe2, 0x60,0x50,0x15,0xff, 0x6e,0x59,0x18,0xf4, +0x44,0x66,0x3b,0xc5, 0x4a,0x6f,0x36,0xce, 0x58,0x74,0x21,0xd3, 0x56,0x7d,0x2c,0xd8, +0x37,0xa1,0x0c,0x7a, 0x39,0xa8,0x01,0x71, 0x2b,0xb3,0x16,0x6c, 0x25,0xba,0x1b,0x67, +0x0f,0x85,0x38,0x56, 0x01,0x8c,0x35,0x5d, 0x13,0x97,0x22,0x40, 0x1d,0x9e,0x2f,0x4b, +0x47,0xe9,0x64,0x22, 0x49,0xe0,0x69,0x29, 0x5b,0xfb,0x7e,0x34, 0x55,0xf2,0x73,0x3f, +0x7f,0xcd,0x50,0x0e, 0x71,0xc4,0x5d,0x05, 0x63,0xdf,0x4a,0x18, 0x6d,0xd6,0x47,0x13, +0xd7,0x31,0xdc,0xca, 0xd9,0x38,0xd1,0xc1, 0xcb,0x23,0xc6,0xdc, 0xc5,0x2a,0xcb,0xd7, +0xef,0x15,0xe8,0xe6, 0xe1,0x1c,0xe5,0xed, 0xf3,0x07,0xf2,0xf0, 0xfd,0x0e,0xff,0xfb, +0xa7,0x79,0xb4,0x92, 0xa9,0x70,0xb9,0x99, 0xbb,0x6b,0xae,0x84, 0xb5,0x62,0xa3,0x8f, +0x9f,0x5d,0x80,0xbe, 0x91,0x54,0x8d,0xb5, 0x83,0x4f,0x9a,0xa8, 0x8d,0x46,0x97,0xa3 +},{ +0x00,0x00,0x00,0x00, 0x0b,0x0e,0x09,0x0d, 0x16,0x1c,0x12,0x1a, 0x1d,0x12,0x1b,0x17, +0x2c,0x38,0x24,0x34, 0x27,0x36,0x2d,0x39, 0x3a,0x24,0x36,0x2e, 0x31,0x2a,0x3f,0x23, +0x58,0x70,0x48,0x68, 0x53,0x7e,0x41,0x65, 0x4e,0x6c,0x5a,0x72, 0x45,0x62,0x53,0x7f, +0x74,0x48,0x6c,0x5c, 0x7f,0x46,0x65,0x51, 0x62,0x54,0x7e,0x46, 0x69,0x5a,0x77,0x4b, +0xb0,0xe0,0x90,0xd0, 0xbb,0xee,0x99,0xdd, 0xa6,0xfc,0x82,0xca, 0xad,0xf2,0x8b,0xc7, +0x9c,0xd8,0xb4,0xe4, 0x97,0xd6,0xbd,0xe9, 0x8a,0xc4,0xa6,0xfe, 0x81,0xca,0xaf,0xf3, +0xe8,0x90,0xd8,0xb8, 0xe3,0x9e,0xd1,0xb5, 0xfe,0x8c,0xca,0xa2, 0xf5,0x82,0xc3,0xaf, +0xc4,0xa8,0xfc,0x8c, 0xcf,0xa6,0xf5,0x81, 0xd2,0xb4,0xee,0x96, 0xd9,0xba,0xe7,0x9b, +0x7b,0xdb,0x3b,0xbb, 0x70,0xd5,0x32,0xb6, 0x6d,0xc7,0x29,0xa1, 0x66,0xc9,0x20,0xac, +0x57,0xe3,0x1f,0x8f, 0x5c,0xed,0x16,0x82, 0x41,0xff,0x0d,0x95, 0x4a,0xf1,0x04,0x98, +0x23,0xab,0x73,0xd3, 0x28,0xa5,0x7a,0xde, 0x35,0xb7,0x61,0xc9, 0x3e,0xb9,0x68,0xc4, +0x0f,0x93,0x57,0xe7, 0x04,0x9d,0x5e,0xea, 0x19,0x8f,0x45,0xfd, 0x12,0x81,0x4c,0xf0, +0xcb,0x3b,0xab,0x6b, 0xc0,0x35,0xa2,0x66, 0xdd,0x27,0xb9,0x71, 0xd6,0x29,0xb0,0x7c, +0xe7,0x03,0x8f,0x5f, 0xec,0x0d,0x86,0x52, 0xf1,0x1f,0x9d,0x45, 0xfa,0x11,0x94,0x48, +0x93,0x4b,0xe3,0x03, 0x98,0x45,0xea,0x0e, 0x85,0x57,0xf1,0x19, 0x8e,0x59,0xf8,0x14, +0xbf,0x73,0xc7,0x37, 0xb4,0x7d,0xce,0x3a, 0xa9,0x6f,0xd5,0x2d, 0xa2,0x61,0xdc,0x20, +0xf6,0xad,0x76,0x6d, 0xfd,0xa3,0x7f,0x60, 0xe0,0xb1,0x64,0x77, 0xeb,0xbf,0x6d,0x7a, +0xda,0x95,0x52,0x59, 0xd1,0x9b,0x5b,0x54, 0xcc,0x89,0x40,0x43, 0xc7,0x87,0x49,0x4e, +0xae,0xdd,0x3e,0x05, 0xa5,0xd3,0x37,0x08, 0xb8,0xc1,0x2c,0x1f, 0xb3,0xcf,0x25,0x12, +0x82,0xe5,0x1a,0x31, 0x89,0xeb,0x13,0x3c, 0x94,0xf9,0x08,0x2b, 0x9f,0xf7,0x01,0x26, +0x46,0x4d,0xe6,0xbd, 0x4d,0x43,0xef,0xb0, 0x50,0x51,0xf4,0xa7, 0x5b,0x5f,0xfd,0xaa, +0x6a,0x75,0xc2,0x89, 0x61,0x7b,0xcb,0x84, 0x7c,0x69,0xd0,0x93, 0x77,0x67,0xd9,0x9e, +0x1e,0x3d,0xae,0xd5, 0x15,0x33,0xa7,0xd8, 0x08,0x21,0xbc,0xcf, 0x03,0x2f,0xb5,0xc2, +0x32,0x05,0x8a,0xe1, 0x39,0x0b,0x83,0xec, 0x24,0x19,0x98,0xfb, 0x2f,0x17,0x91,0xf6, +0x8d,0x76,0x4d,0xd6, 0x86,0x78,0x44,0xdb, 0x9b,0x6a,0x5f,0xcc, 0x90,0x64,0x56,0xc1, +0xa1,0x4e,0x69,0xe2, 0xaa,0x40,0x60,0xef, 0xb7,0x52,0x7b,0xf8, 0xbc,0x5c,0x72,0xf5, +0xd5,0x06,0x05,0xbe, 0xde,0x08,0x0c,0xb3, 0xc3,0x1a,0x17,0xa4, 0xc8,0x14,0x1e,0xa9, +0xf9,0x3e,0x21,0x8a, 0xf2,0x30,0x28,0x87, 0xef,0x22,0x33,0x90, 0xe4,0x2c,0x3a,0x9d, +0x3d,0x96,0xdd,0x06, 0x36,0x98,0xd4,0x0b, 0x2b,0x8a,0xcf,0x1c, 0x20,0x84,0xc6,0x11, +0x11,0xae,0xf9,0x32, 0x1a,0xa0,0xf0,0x3f, 0x07,0xb2,0xeb,0x28, 0x0c,0xbc,0xe2,0x25, +0x65,0xe6,0x95,0x6e, 0x6e,0xe8,0x9c,0x63, 0x73,0xfa,0x87,0x74, 0x78,0xf4,0x8e,0x79, +0x49,0xde,0xb1,0x5a, 0x42,0xd0,0xb8,0x57, 0x5f,0xc2,0xa3,0x40, 0x54,0xcc,0xaa,0x4d, +0xf7,0x41,0xec,0xda, 0xfc,0x4f,0xe5,0xd7, 0xe1,0x5d,0xfe,0xc0, 0xea,0x53,0xf7,0xcd, +0xdb,0x79,0xc8,0xee, 0xd0,0x77,0xc1,0xe3, 0xcd,0x65,0xda,0xf4, 0xc6,0x6b,0xd3,0xf9, +0xaf,0x31,0xa4,0xb2, 0xa4,0x3f,0xad,0xbf, 0xb9,0x2d,0xb6,0xa8, 0xb2,0x23,0xbf,0xa5, +0x83,0x09,0x80,0x86, 0x88,0x07,0x89,0x8b, 0x95,0x15,0x92,0x9c, 0x9e,0x1b,0x9b,0x91, +0x47,0xa1,0x7c,0x0a, 0x4c,0xaf,0x75,0x07, 0x51,0xbd,0x6e,0x10, 0x5a,0xb3,0x67,0x1d, +0x6b,0x99,0x58,0x3e, 0x60,0x97,0x51,0x33, 0x7d,0x85,0x4a,0x24, 0x76,0x8b,0x43,0x29, +0x1f,0xd1,0x34,0x62, 0x14,0xdf,0x3d,0x6f, 0x09,0xcd,0x26,0x78, 0x02,0xc3,0x2f,0x75, +0x33,0xe9,0x10,0x56, 0x38,0xe7,0x19,0x5b, 0x25,0xf5,0x02,0x4c, 0x2e,0xfb,0x0b,0x41, +0x8c,0x9a,0xd7,0x61, 0x87,0x94,0xde,0x6c, 0x9a,0x86,0xc5,0x7b, 0x91,0x88,0xcc,0x76, +0xa0,0xa2,0xf3,0x55, 0xab,0xac,0xfa,0x58, 0xb6,0xbe,0xe1,0x4f, 0xbd,0xb0,0xe8,0x42, +0xd4,0xea,0x9f,0x09, 0xdf,0xe4,0x96,0x04, 0xc2,0xf6,0x8d,0x13, 0xc9,0xf8,0x84,0x1e, +0xf8,0xd2,0xbb,0x3d, 0xf3,0xdc,0xb2,0x30, 0xee,0xce,0xa9,0x27, 0xe5,0xc0,0xa0,0x2a, +0x3c,0x7a,0x47,0xb1, 0x37,0x74,0x4e,0xbc, 0x2a,0x66,0x55,0xab, 0x21,0x68,0x5c,0xa6, +0x10,0x42,0x63,0x85, 0x1b,0x4c,0x6a,0x88, 0x06,0x5e,0x71,0x9f, 0x0d,0x50,0x78,0x92, +0x64,0x0a,0x0f,0xd9, 0x6f,0x04,0x06,0xd4, 0x72,0x16,0x1d,0xc3, 0x79,0x18,0x14,0xce, +0x48,0x32,0x2b,0xed, 0x43,0x3c,0x22,0xe0, 0x5e,0x2e,0x39,0xf7, 0x55,0x20,0x30,0xfa, +0x01,0xec,0x9a,0xb7, 0x0a,0xe2,0x93,0xba, 0x17,0xf0,0x88,0xad, 0x1c,0xfe,0x81,0xa0, +0x2d,0xd4,0xbe,0x83, 0x26,0xda,0xb7,0x8e, 0x3b,0xc8,0xac,0x99, 0x30,0xc6,0xa5,0x94, +0x59,0x9c,0xd2,0xdf, 0x52,0x92,0xdb,0xd2, 0x4f,0x80,0xc0,0xc5, 0x44,0x8e,0xc9,0xc8, +0x75,0xa4,0xf6,0xeb, 0x7e,0xaa,0xff,0xe6, 0x63,0xb8,0xe4,0xf1, 0x68,0xb6,0xed,0xfc, +0xb1,0x0c,0x0a,0x67, 0xba,0x02,0x03,0x6a, 0xa7,0x10,0x18,0x7d, 0xac,0x1e,0x11,0x70, +0x9d,0x34,0x2e,0x53, 0x96,0x3a,0x27,0x5e, 0x8b,0x28,0x3c,0x49, 0x80,0x26,0x35,0x44, +0xe9,0x7c,0x42,0x0f, 0xe2,0x72,0x4b,0x02, 0xff,0x60,0x50,0x15, 0xf4,0x6e,0x59,0x18, +0xc5,0x44,0x66,0x3b, 0xce,0x4a,0x6f,0x36, 0xd3,0x58,0x74,0x21, 0xd8,0x56,0x7d,0x2c, +0x7a,0x37,0xa1,0x0c, 0x71,0x39,0xa8,0x01, 0x6c,0x2b,0xb3,0x16, 0x67,0x25,0xba,0x1b, +0x56,0x0f,0x85,0x38, 0x5d,0x01,0x8c,0x35, 0x40,0x13,0x97,0x22, 0x4b,0x1d,0x9e,0x2f, +0x22,0x47,0xe9,0x64, 0x29,0x49,0xe0,0x69, 0x34,0x5b,0xfb,0x7e, 0x3f,0x55,0xf2,0x73, +0x0e,0x7f,0xcd,0x50, 0x05,0x71,0xc4,0x5d, 0x18,0x63,0xdf,0x4a, 0x13,0x6d,0xd6,0x47, +0xca,0xd7,0x31,0xdc, 0xc1,0xd9,0x38,0xd1, 0xdc,0xcb,0x23,0xc6, 0xd7,0xc5,0x2a,0xcb, +0xe6,0xef,0x15,0xe8, 0xed,0xe1,0x1c,0xe5, 0xf0,0xf3,0x07,0xf2, 0xfb,0xfd,0x0e,0xff, +0x92,0xa7,0x79,0xb4, 0x99,0xa9,0x70,0xb9, 0x84,0xbb,0x6b,0xae, 0x8f,0xb5,0x62,0xa3, +0xbe,0x9f,0x5d,0x80, 0xb5,0x91,0x54,0x8d, 0xa8,0x83,0x4f,0x9a, 0xa3,0x8d,0x46,0x97 +},{ +0x00,0x00,0x00,0x00, 0x0d,0x0b,0x0e,0x09, 0x1a,0x16,0x1c,0x12, 0x17,0x1d,0x12,0x1b, +0x34,0x2c,0x38,0x24, 0x39,0x27,0x36,0x2d, 0x2e,0x3a,0x24,0x36, 0x23,0x31,0x2a,0x3f, +0x68,0x58,0x70,0x48, 0x65,0x53,0x7e,0x41, 0x72,0x4e,0x6c,0x5a, 0x7f,0x45,0x62,0x53, +0x5c,0x74,0x48,0x6c, 0x51,0x7f,0x46,0x65, 0x46,0x62,0x54,0x7e, 0x4b,0x69,0x5a,0x77, +0xd0,0xb0,0xe0,0x90, 0xdd,0xbb,0xee,0x99, 0xca,0xa6,0xfc,0x82, 0xc7,0xad,0xf2,0x8b, +0xe4,0x9c,0xd8,0xb4, 0xe9,0x97,0xd6,0xbd, 0xfe,0x8a,0xc4,0xa6, 0xf3,0x81,0xca,0xaf, +0xb8,0xe8,0x90,0xd8, 0xb5,0xe3,0x9e,0xd1, 0xa2,0xfe,0x8c,0xca, 0xaf,0xf5,0x82,0xc3, +0x8c,0xc4,0xa8,0xfc, 0x81,0xcf,0xa6,0xf5, 0x96,0xd2,0xb4,0xee, 0x9b,0xd9,0xba,0xe7, +0xbb,0x7b,0xdb,0x3b, 0xb6,0x70,0xd5,0x32, 0xa1,0x6d,0xc7,0x29, 0xac,0x66,0xc9,0x20, +0x8f,0x57,0xe3,0x1f, 0x82,0x5c,0xed,0x16, 0x95,0x41,0xff,0x0d, 0x98,0x4a,0xf1,0x04, +0xd3,0x23,0xab,0x73, 0xde,0x28,0xa5,0x7a, 0xc9,0x35,0xb7,0x61, 0xc4,0x3e,0xb9,0x68, +0xe7,0x0f,0x93,0x57, 0xea,0x04,0x9d,0x5e, 0xfd,0x19,0x8f,0x45, 0xf0,0x12,0x81,0x4c, +0x6b,0xcb,0x3b,0xab, 0x66,0xc0,0x35,0xa2, 0x71,0xdd,0x27,0xb9, 0x7c,0xd6,0x29,0xb0, +0x5f,0xe7,0x03,0x8f, 0x52,0xec,0x0d,0x86, 0x45,0xf1,0x1f,0x9d, 0x48,0xfa,0x11,0x94, +0x03,0x93,0x4b,0xe3, 0x0e,0x98,0x45,0xea, 0x19,0x85,0x57,0xf1, 0x14,0x8e,0x59,0xf8, +0x37,0xbf,0x73,0xc7, 0x3a,0xb4,0x7d,0xce, 0x2d,0xa9,0x6f,0xd5, 0x20,0xa2,0x61,0xdc, +0x6d,0xf6,0xad,0x76, 0x60,0xfd,0xa3,0x7f, 0x77,0xe0,0xb1,0x64, 0x7a,0xeb,0xbf,0x6d, +0x59,0xda,0x95,0x52, 0x54,0xd1,0x9b,0x5b, 0x43,0xcc,0x89,0x40, 0x4e,0xc7,0x87,0x49, +0x05,0xae,0xdd,0x3e, 0x08,0xa5,0xd3,0x37, 0x1f,0xb8,0xc1,0x2c, 0x12,0xb3,0xcf,0x25, +0x31,0x82,0xe5,0x1a, 0x3c,0x89,0xeb,0x13, 0x2b,0x94,0xf9,0x08, 0x26,0x9f,0xf7,0x01, +0xbd,0x46,0x4d,0xe6, 0xb0,0x4d,0x43,0xef, 0xa7,0x50,0x51,0xf4, 0xaa,0x5b,0x5f,0xfd, +0x89,0x6a,0x75,0xc2, 0x84,0x61,0x7b,0xcb, 0x93,0x7c,0x69,0xd0, 0x9e,0x77,0x67,0xd9, +0xd5,0x1e,0x3d,0xae, 0xd8,0x15,0x33,0xa7, 0xcf,0x08,0x21,0xbc, 0xc2,0x03,0x2f,0xb5, +0xe1,0x32,0x05,0x8a, 0xec,0x39,0x0b,0x83, 0xfb,0x24,0x19,0x98, 0xf6,0x2f,0x17,0x91, +0xd6,0x8d,0x76,0x4d, 0xdb,0x86,0x78,0x44, 0xcc,0x9b,0x6a,0x5f, 0xc1,0x90,0x64,0x56, +0xe2,0xa1,0x4e,0x69, 0xef,0xaa,0x40,0x60, 0xf8,0xb7,0x52,0x7b, 0xf5,0xbc,0x5c,0x72, +0xbe,0xd5,0x06,0x05, 0xb3,0xde,0x08,0x0c, 0xa4,0xc3,0x1a,0x17, 0xa9,0xc8,0x14,0x1e, +0x8a,0xf9,0x3e,0x21, 0x87,0xf2,0x30,0x28, 0x90,0xef,0x22,0x33, 0x9d,0xe4,0x2c,0x3a, +0x06,0x3d,0x96,0xdd, 0x0b,0x36,0x98,0xd4, 0x1c,0x2b,0x8a,0xcf, 0x11,0x20,0x84,0xc6, +0x32,0x11,0xae,0xf9, 0x3f,0x1a,0xa0,0xf0, 0x28,0x07,0xb2,0xeb, 0x25,0x0c,0xbc,0xe2, +0x6e,0x65,0xe6,0x95, 0x63,0x6e,0xe8,0x9c, 0x74,0x73,0xfa,0x87, 0x79,0x78,0xf4,0x8e, +0x5a,0x49,0xde,0xb1, 0x57,0x42,0xd0,0xb8, 0x40,0x5f,0xc2,0xa3, 0x4d,0x54,0xcc,0xaa, +0xda,0xf7,0x41,0xec, 0xd7,0xfc,0x4f,0xe5, 0xc0,0xe1,0x5d,0xfe, 0xcd,0xea,0x53,0xf7, +0xee,0xdb,0x79,0xc8, 0xe3,0xd0,0x77,0xc1, 0xf4,0xcd,0x65,0xda, 0xf9,0xc6,0x6b,0xd3, +0xb2,0xaf,0x31,0xa4, 0xbf,0xa4,0x3f,0xad, 0xa8,0xb9,0x2d,0xb6, 0xa5,0xb2,0x23,0xbf, +0x86,0x83,0x09,0x80, 0x8b,0x88,0x07,0x89, 0x9c,0x95,0x15,0x92, 0x91,0x9e,0x1b,0x9b, +0x0a,0x47,0xa1,0x7c, 0x07,0x4c,0xaf,0x75, 0x10,0x51,0xbd,0x6e, 0x1d,0x5a,0xb3,0x67, +0x3e,0x6b,0x99,0x58, 0x33,0x60,0x97,0x51, 0x24,0x7d,0x85,0x4a, 0x29,0x76,0x8b,0x43, +0x62,0x1f,0xd1,0x34, 0x6f,0x14,0xdf,0x3d, 0x78,0x09,0xcd,0x26, 0x75,0x02,0xc3,0x2f, +0x56,0x33,0xe9,0x10, 0x5b,0x38,0xe7,0x19, 0x4c,0x25,0xf5,0x02, 0x41,0x2e,0xfb,0x0b, +0x61,0x8c,0x9a,0xd7, 0x6c,0x87,0x94,0xde, 0x7b,0x9a,0x86,0xc5, 0x76,0x91,0x88,0xcc, +0x55,0xa0,0xa2,0xf3, 0x58,0xab,0xac,0xfa, 0x4f,0xb6,0xbe,0xe1, 0x42,0xbd,0xb0,0xe8, +0x09,0xd4,0xea,0x9f, 0x04,0xdf,0xe4,0x96, 0x13,0xc2,0xf6,0x8d, 0x1e,0xc9,0xf8,0x84, +0x3d,0xf8,0xd2,0xbb, 0x30,0xf3,0xdc,0xb2, 0x27,0xee,0xce,0xa9, 0x2a,0xe5,0xc0,0xa0, +0xb1,0x3c,0x7a,0x47, 0xbc,0x37,0x74,0x4e, 0xab,0x2a,0x66,0x55, 0xa6,0x21,0x68,0x5c, +0x85,0x10,0x42,0x63, 0x88,0x1b,0x4c,0x6a, 0x9f,0x06,0x5e,0x71, 0x92,0x0d,0x50,0x78, +0xd9,0x64,0x0a,0x0f, 0xd4,0x6f,0x04,0x06, 0xc3,0x72,0x16,0x1d, 0xce,0x79,0x18,0x14, +0xed,0x48,0x32,0x2b, 0xe0,0x43,0x3c,0x22, 0xf7,0x5e,0x2e,0x39, 0xfa,0x55,0x20,0x30, +0xb7,0x01,0xec,0x9a, 0xba,0x0a,0xe2,0x93, 0xad,0x17,0xf0,0x88, 0xa0,0x1c,0xfe,0x81, +0x83,0x2d,0xd4,0xbe, 0x8e,0x26,0xda,0xb7, 0x99,0x3b,0xc8,0xac, 0x94,0x30,0xc6,0xa5, +0xdf,0x59,0x9c,0xd2, 0xd2,0x52,0x92,0xdb, 0xc5,0x4f,0x80,0xc0, 0xc8,0x44,0x8e,0xc9, +0xeb,0x75,0xa4,0xf6, 0xe6,0x7e,0xaa,0xff, 0xf1,0x63,0xb8,0xe4, 0xfc,0x68,0xb6,0xed, +0x67,0xb1,0x0c,0x0a, 0x6a,0xba,0x02,0x03, 0x7d,0xa7,0x10,0x18, 0x70,0xac,0x1e,0x11, +0x53,0x9d,0x34,0x2e, 0x5e,0x96,0x3a,0x27, 0x49,0x8b,0x28,0x3c, 0x44,0x80,0x26,0x35, +0x0f,0xe9,0x7c,0x42, 0x02,0xe2,0x72,0x4b, 0x15,0xff,0x60,0x50, 0x18,0xf4,0x6e,0x59, +0x3b,0xc5,0x44,0x66, 0x36,0xce,0x4a,0x6f, 0x21,0xd3,0x58,0x74, 0x2c,0xd8,0x56,0x7d, +0x0c,0x7a,0x37,0xa1, 0x01,0x71,0x39,0xa8, 0x16,0x6c,0x2b,0xb3, 0x1b,0x67,0x25,0xba, +0x38,0x56,0x0f,0x85, 0x35,0x5d,0x01,0x8c, 0x22,0x40,0x13,0x97, 0x2f,0x4b,0x1d,0x9e, +0x64,0x22,0x47,0xe9, 0x69,0x29,0x49,0xe0, 0x7e,0x34,0x5b,0xfb, 0x73,0x3f,0x55,0xf2, +0x50,0x0e,0x7f,0xcd, 0x5d,0x05,0x71,0xc4, 0x4a,0x18,0x63,0xdf, 0x47,0x13,0x6d,0xd6, +0xdc,0xca,0xd7,0x31, 0xd1,0xc1,0xd9,0x38, 0xc6,0xdc,0xcb,0x23, 0xcb,0xd7,0xc5,0x2a, +0xe8,0xe6,0xef,0x15, 0xe5,0xed,0xe1,0x1c, 0xf2,0xf0,0xf3,0x07, 0xff,0xfb,0xfd,0x0e, +0xb4,0x92,0xa7,0x79, 0xb9,0x99,0xa9,0x70, 0xae,0x84,0xbb,0x6b, 0xa3,0x8f,0xb5,0x62, +0x80,0xbe,0x9f,0x5d, 0x8d,0xb5,0x91,0x54, 0x9a,0xa8,0x83,0x4f, 0x97,0xa3,0x8d,0x46 +},{ +0x00,0x00,0x00,0x00, 0x09,0x0d,0x0b,0x0e, 0x12,0x1a,0x16,0x1c, 0x1b,0x17,0x1d,0x12, +0x24,0x34,0x2c,0x38, 0x2d,0x39,0x27,0x36, 0x36,0x2e,0x3a,0x24, 0x3f,0x23,0x31,0x2a, +0x48,0x68,0x58,0x70, 0x41,0x65,0x53,0x7e, 0x5a,0x72,0x4e,0x6c, 0x53,0x7f,0x45,0x62, +0x6c,0x5c,0x74,0x48, 0x65,0x51,0x7f,0x46, 0x7e,0x46,0x62,0x54, 0x77,0x4b,0x69,0x5a, +0x90,0xd0,0xb0,0xe0, 0x99,0xdd,0xbb,0xee, 0x82,0xca,0xa6,0xfc, 0x8b,0xc7,0xad,0xf2, +0xb4,0xe4,0x9c,0xd8, 0xbd,0xe9,0x97,0xd6, 0xa6,0xfe,0x8a,0xc4, 0xaf,0xf3,0x81,0xca, +0xd8,0xb8,0xe8,0x90, 0xd1,0xb5,0xe3,0x9e, 0xca,0xa2,0xfe,0x8c, 0xc3,0xaf,0xf5,0x82, +0xfc,0x8c,0xc4,0xa8, 0xf5,0x81,0xcf,0xa6, 0xee,0x96,0xd2,0xb4, 0xe7,0x9b,0xd9,0xba, +0x3b,0xbb,0x7b,0xdb, 0x32,0xb6,0x70,0xd5, 0x29,0xa1,0x6d,0xc7, 0x20,0xac,0x66,0xc9, +0x1f,0x8f,0x57,0xe3, 0x16,0x82,0x5c,0xed, 0x0d,0x95,0x41,0xff, 0x04,0x98,0x4a,0xf1, +0x73,0xd3,0x23,0xab, 0x7a,0xde,0x28,0xa5, 0x61,0xc9,0x35,0xb7, 0x68,0xc4,0x3e,0xb9, +0x57,0xe7,0x0f,0x93, 0x5e,0xea,0x04,0x9d, 0x45,0xfd,0x19,0x8f, 0x4c,0xf0,0x12,0x81, +0xab,0x6b,0xcb,0x3b, 0xa2,0x66,0xc0,0x35, 0xb9,0x71,0xdd,0x27, 0xb0,0x7c,0xd6,0x29, +0x8f,0x5f,0xe7,0x03, 0x86,0x52,0xec,0x0d, 0x9d,0x45,0xf1,0x1f, 0x94,0x48,0xfa,0x11, +0xe3,0x03,0x93,0x4b, 0xea,0x0e,0x98,0x45, 0xf1,0x19,0x85,0x57, 0xf8,0x14,0x8e,0x59, +0xc7,0x37,0xbf,0x73, 0xce,0x3a,0xb4,0x7d, 0xd5,0x2d,0xa9,0x6f, 0xdc,0x20,0xa2,0x61, +0x76,0x6d,0xf6,0xad, 0x7f,0x60,0xfd,0xa3, 0x64,0x77,0xe0,0xb1, 0x6d,0x7a,0xeb,0xbf, +0x52,0x59,0xda,0x95, 0x5b,0x54,0xd1,0x9b, 0x40,0x43,0xcc,0x89, 0x49,0x4e,0xc7,0x87, +0x3e,0x05,0xae,0xdd, 0x37,0x08,0xa5,0xd3, 0x2c,0x1f,0xb8,0xc1, 0x25,0x12,0xb3,0xcf, +0x1a,0x31,0x82,0xe5, 0x13,0x3c,0x89,0xeb, 0x08,0x2b,0x94,0xf9, 0x01,0x26,0x9f,0xf7, +0xe6,0xbd,0x46,0x4d, 0xef,0xb0,0x4d,0x43, 0xf4,0xa7,0x50,0x51, 0xfd,0xaa,0x5b,0x5f, +0xc2,0x89,0x6a,0x75, 0xcb,0x84,0x61,0x7b, 0xd0,0x93,0x7c,0x69, 0xd9,0x9e,0x77,0x67, +0xae,0xd5,0x1e,0x3d, 0xa7,0xd8,0x15,0x33, 0xbc,0xcf,0x08,0x21, 0xb5,0xc2,0x03,0x2f, +0x8a,0xe1,0x32,0x05, 0x83,0xec,0x39,0x0b, 0x98,0xfb,0x24,0x19, 0x91,0xf6,0x2f,0x17, +0x4d,0xd6,0x8d,0x76, 0x44,0xdb,0x86,0x78, 0x5f,0xcc,0x9b,0x6a, 0x56,0xc1,0x90,0x64, +0x69,0xe2,0xa1,0x4e, 0x60,0xef,0xaa,0x40, 0x7b,0xf8,0xb7,0x52, 0x72,0xf5,0xbc,0x5c, +0x05,0xbe,0xd5,0x06, 0x0c,0xb3,0xde,0x08, 0x17,0xa4,0xc3,0x1a, 0x1e,0xa9,0xc8,0x14, +0x21,0x8a,0xf9,0x3e, 0x28,0x87,0xf2,0x30, 0x33,0x90,0xef,0x22, 0x3a,0x9d,0xe4,0x2c, +0xdd,0x06,0x3d,0x96, 0xd4,0x0b,0x36,0x98, 0xcf,0x1c,0x2b,0x8a, 0xc6,0x11,0x20,0x84, +0xf9,0x32,0x11,0xae, 0xf0,0x3f,0x1a,0xa0, 0xeb,0x28,0x07,0xb2, 0xe2,0x25,0x0c,0xbc, +0x95,0x6e,0x65,0xe6, 0x9c,0x63,0x6e,0xe8, 0x87,0x74,0x73,0xfa, 0x8e,0x79,0x78,0xf4, +0xb1,0x5a,0x49,0xde, 0xb8,0x57,0x42,0xd0, 0xa3,0x40,0x5f,0xc2, 0xaa,0x4d,0x54,0xcc, +0xec,0xda,0xf7,0x41, 0xe5,0xd7,0xfc,0x4f, 0xfe,0xc0,0xe1,0x5d, 0xf7,0xcd,0xea,0x53, +0xc8,0xee,0xdb,0x79, 0xc1,0xe3,0xd0,0x77, 0xda,0xf4,0xcd,0x65, 0xd3,0xf9,0xc6,0x6b, +0xa4,0xb2,0xaf,0x31, 0xad,0xbf,0xa4,0x3f, 0xb6,0xa8,0xb9,0x2d, 0xbf,0xa5,0xb2,0x23, +0x80,0x86,0x83,0x09, 0x89,0x8b,0x88,0x07, 0x92,0x9c,0x95,0x15, 0x9b,0x91,0x9e,0x1b, +0x7c,0x0a,0x47,0xa1, 0x75,0x07,0x4c,0xaf, 0x6e,0x10,0x51,0xbd, 0x67,0x1d,0x5a,0xb3, +0x58,0x3e,0x6b,0x99, 0x51,0x33,0x60,0x97, 0x4a,0x24,0x7d,0x85, 0x43,0x29,0x76,0x8b, +0x34,0x62,0x1f,0xd1, 0x3d,0x6f,0x14,0xdf, 0x26,0x78,0x09,0xcd, 0x2f,0x75,0x02,0xc3, +0x10,0x56,0x33,0xe9, 0x19,0x5b,0x38,0xe7, 0x02,0x4c,0x25,0xf5, 0x0b,0x41,0x2e,0xfb, +0xd7,0x61,0x8c,0x9a, 0xde,0x6c,0x87,0x94, 0xc5,0x7b,0x9a,0x86, 0xcc,0x76,0x91,0x88, +0xf3,0x55,0xa0,0xa2, 0xfa,0x58,0xab,0xac, 0xe1,0x4f,0xb6,0xbe, 0xe8,0x42,0xbd,0xb0, +0x9f,0x09,0xd4,0xea, 0x96,0x04,0xdf,0xe4, 0x8d,0x13,0xc2,0xf6, 0x84,0x1e,0xc9,0xf8, +0xbb,0x3d,0xf8,0xd2, 0xb2,0x30,0xf3,0xdc, 0xa9,0x27,0xee,0xce, 0xa0,0x2a,0xe5,0xc0, +0x47,0xb1,0x3c,0x7a, 0x4e,0xbc,0x37,0x74, 0x55,0xab,0x2a,0x66, 0x5c,0xa6,0x21,0x68, +0x63,0x85,0x10,0x42, 0x6a,0x88,0x1b,0x4c, 0x71,0x9f,0x06,0x5e, 0x78,0x92,0x0d,0x50, +0x0f,0xd9,0x64,0x0a, 0x06,0xd4,0x6f,0x04, 0x1d,0xc3,0x72,0x16, 0x14,0xce,0x79,0x18, +0x2b,0xed,0x48,0x32, 0x22,0xe0,0x43,0x3c, 0x39,0xf7,0x5e,0x2e, 0x30,0xfa,0x55,0x20, +0x9a,0xb7,0x01,0xec, 0x93,0xba,0x0a,0xe2, 0x88,0xad,0x17,0xf0, 0x81,0xa0,0x1c,0xfe, +0xbe,0x83,0x2d,0xd4, 0xb7,0x8e,0x26,0xda, 0xac,0x99,0x3b,0xc8, 0xa5,0x94,0x30,0xc6, +0xd2,0xdf,0x59,0x9c, 0xdb,0xd2,0x52,0x92, 0xc0,0xc5,0x4f,0x80, 0xc9,0xc8,0x44,0x8e, +0xf6,0xeb,0x75,0xa4, 0xff,0xe6,0x7e,0xaa, 0xe4,0xf1,0x63,0xb8, 0xed,0xfc,0x68,0xb6, +0x0a,0x67,0xb1,0x0c, 0x03,0x6a,0xba,0x02, 0x18,0x7d,0xa7,0x10, 0x11,0x70,0xac,0x1e, +0x2e,0x53,0x9d,0x34, 0x27,0x5e,0x96,0x3a, 0x3c,0x49,0x8b,0x28, 0x35,0x44,0x80,0x26, +0x42,0x0f,0xe9,0x7c, 0x4b,0x02,0xe2,0x72, 0x50,0x15,0xff,0x60, 0x59,0x18,0xf4,0x6e, +0x66,0x3b,0xc5,0x44, 0x6f,0x36,0xce,0x4a, 0x74,0x21,0xd3,0x58, 0x7d,0x2c,0xd8,0x56, +0xa1,0x0c,0x7a,0x37, 0xa8,0x01,0x71,0x39, 0xb3,0x16,0x6c,0x2b, 0xba,0x1b,0x67,0x25, +0x85,0x38,0x56,0x0f, 0x8c,0x35,0x5d,0x01, 0x97,0x22,0x40,0x13, 0x9e,0x2f,0x4b,0x1d, +0xe9,0x64,0x22,0x47, 0xe0,0x69,0x29,0x49, 0xfb,0x7e,0x34,0x5b, 0xf2,0x73,0x3f,0x55, +0xcd,0x50,0x0e,0x7f, 0xc4,0x5d,0x05,0x71, 0xdf,0x4a,0x18,0x63, 0xd6,0x47,0x13,0x6d, +0x31,0xdc,0xca,0xd7, 0x38,0xd1,0xc1,0xd9, 0x23,0xc6,0xdc,0xcb, 0x2a,0xcb,0xd7,0xc5, +0x15,0xe8,0xe6,0xef, 0x1c,0xe5,0xed,0xe1, 0x07,0xf2,0xf0,0xf3, 0x0e,0xff,0xfb,0xfd, +0x79,0xb4,0x92,0xa7, 0x70,0xb9,0x99,0xa9, 0x6b,0xae,0x84,0xbb, 0x62,0xa3,0x8f,0xb5, +0x5d,0x80,0xbe,0x9f, 0x54,0x8d,0xb5,0x91, 0x4f,0x9a,0xa8,0x83, 0x46,0x97,0xa3,0x8d +}}; + +/* +// +// The AES S-box values can be retrieved from the AesSboxMatrixMult table. +// S[x] = AesSboxMatrixMult[0][x][1]. +// We save codespace by not having a separate table for the S-box. +// This trick doesn't work for the inverse S-box as the +// Inverse MDS matrix does not have a coefficient equal to 1. +// + +SYMCRYPT_ALIGN_AT(256) const BYTE SymCryptAesSbox[256] = { + 99, 124, 119, 123, 242, 107, 111, 197, 48, 1, 103, 43, 254, 215, 171, 118, +202, 130, 201, 125, 250, 89, 71, 240, 173, 212, 162, 175, 156, 164, 114, 192, +183, 253, 147, 38, 54, 63, 247, 204, 52, 165, 229, 241, 113, 216, 49, 21, + 4, 199, 35, 195, 24, 150, 5, 154, 7, 18, 128, 226, 235, 39, 178, 117, + 9, 131, 44, 26, 27, 110, 90, 160, 82, 59, 214, 179, 41, 227, 47, 132, + 83, 209, 0, 237, 32, 252, 177, 91, 106, 203, 190, 57, 74, 76, 88, 207, +208, 239, 170, 251, 67, 77, 51, 133, 69, 249, 2, 127, 80, 60, 159, 168, + 81, 163, 64, 143, 146, 157, 56, 245, 188, 182, 218, 33, 16, 255, 243, 210, +205, 12, 19, 236, 95, 151, 68, 23, 196, 167, 126, 61, 100, 93, 25, 115, + 96, 129, 79, 220, 34, 42, 144, 136, 70, 238, 184, 20, 222, 94, 11, 219, +224, 50, 58, 10, 73, 6, 36, 92, 194, 211, 172, 98, 145, 149, 228, 121, +231, 200, 55, 109, 141, 213, 78, 169, 108, 86, 244, 234, 101, 122, 174, 8, +186, 120, 37, 46, 28, 166, 180, 198, 232, 221, 116, 31, 75, 189, 139, 138, +112, 62, 181, 102, 72, 3, 246, 14, 97, 53, 87, 185, 134, 193, 29, 158, +225, 248, 152, 17, 105, 217, 142, 148, 155, 30, 135, 233, 206, 85, 40, 223, +140, 161, 137, 13, 191, 230, 66, 104, 65, 153, 45, 15, 176, 84, 187, 22 +}; +*/ + +SYMCRYPT_ALIGN_AT(256) const BYTE SymCryptAesInvSbox[256] = { // For final round in decryption +0x52,0x09,0x6a,0xd5, +0x30,0x36,0xa5,0x38, +0xbf,0x40,0xa3,0x9e, +0x81,0xf3,0xd7,0xfb, +0x7c,0xe3,0x39,0x82, +0x9b,0x2f,0xff,0x87, +0x34,0x8e,0x43,0x44, +0xc4,0xde,0xe9,0xcb, +0x54,0x7b,0x94,0x32, +0xa6,0xc2,0x23,0x3d, +0xee,0x4c,0x95,0x0b, +0x42,0xfa,0xc3,0x4e, +0x08,0x2e,0xa1,0x66, +0x28,0xd9,0x24,0xb2, +0x76,0x5b,0xa2,0x49, +0x6d,0x8b,0xd1,0x25, +0x72,0xf8,0xf6,0x64, +0x86,0x68,0x98,0x16, +0xd4,0xa4,0x5c,0xcc, +0x5d,0x65,0xb6,0x92, +0x6c,0x70,0x48,0x50, +0xfd,0xed,0xb9,0xda, +0x5e,0x15,0x46,0x57, +0xa7,0x8d,0x9d,0x84, +0x90,0xd8,0xab,0x00, +0x8c,0xbc,0xd3,0x0a, +0xf7,0xe4,0x58,0x05, +0xb8,0xb3,0x45,0x06, +0xd0,0x2c,0x1e,0x8f, +0xca,0x3f,0x0f,0x02, +0xc1,0xaf,0xbd,0x03, +0x01,0x13,0x8a,0x6b, +0x3a,0x91,0x11,0x41, +0x4f,0x67,0xdc,0xea, +0x97,0xf2,0xcf,0xce, +0xf0,0xb4,0xe6,0x73, +0x96,0xac,0x74,0x22, +0xe7,0xad,0x35,0x85, +0xe2,0xf9,0x37,0xe8, +0x1c,0x75,0xdf,0x6e, +0x47,0xf1,0x1a,0x71, +0x1d,0x29,0xc5,0x89, +0x6f,0xb7,0x62,0x0e, +0xaa,0x18,0xbe,0x1b, +0xfc,0x56,0x3e,0x4b, +0xc6,0xd2,0x79,0x20, +0x9a,0xdb,0xc0,0xfe, +0x78,0xcd,0x5a,0xf4, +0x1f,0xdd,0xa8,0x33, +0x88,0x07,0xc7,0x31, +0xb1,0x12,0x10,0x59, +0x27,0x80,0xec,0x5f, +0x60,0x51,0x7f,0xa9, +0x19,0xb5,0x4a,0x0d, +0x2d,0xe5,0x7a,0x9f, +0x93,0xc9,0x9c,0xef, +0xa0,0xe0,0x3b,0x4d, +0xae,0x2a,0xf5,0xb0, +0xc8,0xeb,0xbb,0x3c, +0x83,0x53,0x99,0x61, +0x17,0x2b,0x04,0x7e, +0xba,0x77,0xd6,0x26, +0xe1,0x69,0x14,0x63, +0x55,0x21,0x0c,0x7d +}; diff --git a/libs/symcrypt/lib/DesTables.c b/libs/symcrypt/lib/DesTables.c new file mode 100644 index 00000000000..d39375b8ad5 --- /dev/null +++ b/libs/symcrypt/lib/DesTables.c @@ -0,0 +1,280 @@ +// +// DesTables.c static lookup tables for DES +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// +// These tables were copies +// from the RSA32 DES implementation. See 3des.c for details. +// + +#include "precomp.h" + +// +// Alignments are chosen to reduce side-channel attacks through the TLB cache. +// We align each table to a multiple of the size within which we do data-dependent +// lookups. For example, the table below is aligned to 256. It is not a secret +// that the 8 sub-tables are accessed, but which value inside each sub-table is a secret. +// Aligning to 256 still leaves the data cache line leakage, but avoids any TLB-related leakage. +// +SYMCRYPT_ALIGN_AT( 256 ) const UINT32 SymCryptDesSpbox[8][64] = { +0x02080800,0x00080000,0x02000002,0x02080802, +0x02000000,0x00080802,0x00080002,0x02000002, +0x00080802,0x02080800,0x02080000,0x00000802, +0x02000802,0x02000000,0x00000000,0x00080002, +0x00080000,0x00000002,0x02000800,0x00080800, +0x02080802,0x02080000,0x00000802,0x02000800, +0x00000002,0x00000800,0x00080800,0x02080002, +0x00000800,0x02000802,0x02080002,0x00000000, +0x00000000,0x02080802,0x02000800,0x00080002, +0x02080800,0x00080000,0x00000802,0x02000800, +0x02080002,0x00000800,0x00080800,0x02000002, +0x00080802,0x00000002,0x02000002,0x02080000, +0x02080802,0x00080800,0x02080000,0x02000802, +0x02000000,0x00000802,0x00080002,0x00000000, +0x00080000,0x02000000,0x02000802,0x02080800, +0x00000002,0x02080002,0x00000800,0x00080802, +0x40108010,0x00000000,0x00108000,0x40100000, +0x40000010,0x00008010,0x40008000,0x00108000, +0x00008000,0x40100010,0x00000010,0x40008000, +0x00100010,0x40108000,0x40100000,0x00000010, +0x00100000,0x40008010,0x40100010,0x00008000, +0x00108010,0x40000000,0x00000000,0x00100010, +0x40008010,0x00108010,0x40108000,0x40000010, +0x40000000,0x00100000,0x00008010,0x40108010, +0x00100010,0x40108000,0x40008000,0x00108010, +0x40108010,0x00100010,0x40000010,0x00000000, +0x40000000,0x00008010,0x00100000,0x40100010, +0x00008000,0x40000000,0x00108010,0x40008010, +0x40108000,0x00008000,0x00000000,0x40000010, +0x00000010,0x40108010,0x00108000,0x40100000, +0x40100010,0x00100000,0x00008010,0x40008000, +0x40008010,0x00000010,0x40100000,0x00108000, +0x04000001,0x04040100,0x00000100,0x04000101, +0x00040001,0x04000000,0x04000101,0x00040100, +0x04000100,0x00040000,0x04040000,0x00000001, +0x04040101,0x00000101,0x00000001,0x04040001, +0x00000000,0x00040001,0x04040100,0x00000100, +0x00000101,0x04040101,0x00040000,0x04000001, +0x04040001,0x04000100,0x00040101,0x04040000, +0x00040100,0x00000000,0x04000000,0x00040101, +0x04040100,0x00000100,0x00000001,0x00040000, +0x00000101,0x00040001,0x04040000,0x04000101, +0x00000000,0x04040100,0x00040100,0x04040001, +0x00040001,0x04000000,0x04040101,0x00000001, +0x00040101,0x04000001,0x04000000,0x04040101, +0x00040000,0x04000100,0x04000101,0x00040100, +0x04000100,0x00000000,0x04040001,0x00000101, +0x04000001,0x00040101,0x00000100,0x04040000, +0x00401008,0x10001000,0x00000008,0x10401008, +0x00000000,0x10400000,0x10001008,0x00400008, +0x10401000,0x10000008,0x10000000,0x00001008, +0x10000008,0x00401008,0x00400000,0x10000000, +0x10400008,0x00401000,0x00001000,0x00000008, +0x00401000,0x10001008,0x10400000,0x00001000, +0x00001008,0x00000000,0x00400008,0x10401000, +0x10001000,0x10400008,0x10401008,0x00400000, +0x10400008,0x00001008,0x00400000,0x10000008, +0x00401000,0x10001000,0x00000008,0x10400000, +0x10001008,0x00000000,0x00001000,0x00400008, +0x00000000,0x10400008,0x10401000,0x00001000, +0x10000000,0x10401008,0x00401008,0x00400000, +0x10401008,0x00000008,0x10001000,0x00401008, +0x00400008,0x00401000,0x10400000,0x10001008, +0x00001008,0x10000000,0x10000008,0x10401000, +0x08000000,0x00010000,0x00000400,0x08010420, +0x08010020,0x08000400,0x00010420,0x08010000, +0x00010000,0x00000020,0x08000020,0x00010400, +0x08000420,0x08010020,0x08010400,0x00000000, +0x00010400,0x08000000,0x00010020,0x00000420, +0x08000400,0x00010420,0x00000000,0x08000020, +0x00000020,0x08000420,0x08010420,0x00010020, +0x08010000,0x00000400,0x00000420,0x08010400, +0x08010400,0x08000420,0x00010020,0x08010000, +0x00010000,0x00000020,0x08000020,0x08000400, +0x08000000,0x00010400,0x08010420,0x00000000, +0x00010420,0x08000000,0x00000400,0x00010020, +0x08000420,0x00000400,0x00000000,0x08010420, +0x08010020,0x08010400,0x00000420,0x00010000, +0x00010400,0x08010020,0x08000400,0x00000420, +0x00000020,0x00010420,0x08010000,0x08000020, +0x80000040,0x00200040,0x00000000,0x80202000, +0x00200040,0x00002000,0x80002040,0x00200000, +0x00002040,0x80202040,0x00202000,0x80000000, +0x80002000,0x80000040,0x80200000,0x00202040, +0x00200000,0x80002040,0x80200040,0x00000000, +0x00002000,0x00000040,0x80202000,0x80200040, +0x80202040,0x80200000,0x80000000,0x00002040, +0x00000040,0x00202000,0x00202040,0x80002000, +0x00002040,0x80000000,0x80002000,0x00202040, +0x80202000,0x00200040,0x00000000,0x80002000, +0x80000000,0x00002000,0x80200040,0x00200000, +0x00200040,0x80202040,0x00202000,0x00000040, +0x80202040,0x00202000,0x00200000,0x80002040, +0x80000040,0x80200000,0x00202040,0x00000000, +0x00002000,0x80000040,0x80002040,0x80202000, +0x80200000,0x00002040,0x00000040,0x80200040, +0x00004000,0x00000200,0x01000200,0x01000004, +0x01004204,0x00004004,0x00004200,0x00000000, +0x01000000,0x01000204,0x00000204,0x01004000, +0x00000004,0x01004200,0x01004000,0x00000204, +0x01000204,0x00004000,0x00004004,0x01004204, +0x00000000,0x01000200,0x01000004,0x00004200, +0x01004004,0x00004204,0x01004200,0x00000004, +0x00004204,0x01004004,0x00000200,0x01000000, +0x00004204,0x01004000,0x01004004,0x00000204, +0x00004000,0x00000200,0x01000000,0x01004004, +0x01000204,0x00004204,0x00004200,0x00000000, +0x00000200,0x01000004,0x00000004,0x01000200, +0x00000000,0x01000204,0x01000200,0x00004200, +0x00000204,0x00004000,0x01004204,0x01000000, +0x01004200,0x00000004,0x00004004,0x01004204, +0x01000004,0x01004200,0x01004000,0x00004004, +0x20800080,0x20820000,0x00020080,0x00000000, +0x20020000,0x00800080,0x20800000,0x20820080, +0x00000080,0x20000000,0x00820000,0x00020080, +0x00820080,0x20020080,0x20000080,0x20800000, +0x00020000,0x00820080,0x00800080,0x20020000, +0x20820080,0x20000080,0x00000000,0x00820000, +0x20000000,0x00800000,0x20020080,0x20800080, +0x00800000,0x00020000,0x20820000,0x00000080, +0x00800000,0x00020000,0x20000080,0x20820080, +0x00020080,0x20000000,0x00000000,0x00820000, +0x20800080,0x20020080,0x20020000,0x00800080, +0x20820000,0x00000080,0x00800080,0x20020000, +0x20820080,0x00800000,0x20800000,0x20000080, +0x00820000,0x00020080,0x20020080,0x20800000, +0x00000080,0x20820000,0x00820080,0x00000000, +0x20000000,0x20800080,0x00020000,0x00820080, +}; + +SYMCRYPT_ALIGN_AT(256) const UINT32 SymCryptDesKeySelect[8][64]={ +0x00000000,0x00000010,0x20000000,0x20000010, +0x00010000,0x00010010,0x20010000,0x20010010, +0x00000800,0x00000810,0x20000800,0x20000810, +0x00010800,0x00010810,0x20010800,0x20010810, +0x00000020,0x00000030,0x20000020,0x20000030, +0x00010020,0x00010030,0x20010020,0x20010030, +0x00000820,0x00000830,0x20000820,0x20000830, +0x00010820,0x00010830,0x20010820,0x20010830, +0x00080000,0x00080010,0x20080000,0x20080010, +0x00090000,0x00090010,0x20090000,0x20090010, +0x00080800,0x00080810,0x20080800,0x20080810, +0x00090800,0x00090810,0x20090800,0x20090810, +0x00080020,0x00080030,0x20080020,0x20080030, +0x00090020,0x00090030,0x20090020,0x20090030, +0x00080820,0x00080830,0x20080820,0x20080830, +0x00090820,0x00090830,0x20090820,0x20090830, +0x00000000,0x02000000,0x00002000,0x02002000, +0x00200000,0x02200000,0x00202000,0x02202000, +0x00000004,0x02000004,0x00002004,0x02002004, +0x00200004,0x02200004,0x00202004,0x02202004, +0x00000400,0x02000400,0x00002400,0x02002400, +0x00200400,0x02200400,0x00202400,0x02202400, +0x00000404,0x02000404,0x00002404,0x02002404, +0x00200404,0x02200404,0x00202404,0x02202404, +0x10000000,0x12000000,0x10002000,0x12002000, +0x10200000,0x12200000,0x10202000,0x12202000, +0x10000004,0x12000004,0x10002004,0x12002004, +0x10200004,0x12200004,0x10202004,0x12202004, +0x10000400,0x12000400,0x10002400,0x12002400, +0x10200400,0x12200400,0x10202400,0x12202400, +0x10000404,0x12000404,0x10002404,0x12002404, +0x10200404,0x12200404,0x10202404,0x12202404, +0x00000000,0x00000001,0x00040000,0x00040001, +0x01000000,0x01000001,0x01040000,0x01040001, +0x00000002,0x00000003,0x00040002,0x00040003, +0x01000002,0x01000003,0x01040002,0x01040003, +0x00000200,0x00000201,0x00040200,0x00040201, +0x01000200,0x01000201,0x01040200,0x01040201, +0x00000202,0x00000203,0x00040202,0x00040203, +0x01000202,0x01000203,0x01040202,0x01040203, +0x08000000,0x08000001,0x08040000,0x08040001, +0x09000000,0x09000001,0x09040000,0x09040001, +0x08000002,0x08000003,0x08040002,0x08040003, +0x09000002,0x09000003,0x09040002,0x09040003, +0x08000200,0x08000201,0x08040200,0x08040201, +0x09000200,0x09000201,0x09040200,0x09040201, +0x08000202,0x08000203,0x08040202,0x08040203, +0x09000202,0x09000203,0x09040202,0x09040203, +0x00000000,0x00100000,0x00000100,0x00100100, +0x00000008,0x00100008,0x00000108,0x00100108, +0x00001000,0x00101000,0x00001100,0x00101100, +0x00001008,0x00101008,0x00001108,0x00101108, +0x04000000,0x04100000,0x04000100,0x04100100, +0x04000008,0x04100008,0x04000108,0x04100108, +0x04001000,0x04101000,0x04001100,0x04101100, +0x04001008,0x04101008,0x04001108,0x04101108, +0x00020000,0x00120000,0x00020100,0x00120100, +0x00020008,0x00120008,0x00020108,0x00120108, +0x00021000,0x00121000,0x00021100,0x00121100, +0x00021008,0x00121008,0x00021108,0x00121108, +0x04020000,0x04120000,0x04020100,0x04120100, +0x04020008,0x04120008,0x04020108,0x04120108, +0x04021000,0x04121000,0x04021100,0x04121100, +0x04021008,0x04121008,0x04021108,0x04121108, +0x00000000,0x10000000,0x00010000,0x10010000, +0x00000004,0x10000004,0x00010004,0x10010004, +0x20000000,0x30000000,0x20010000,0x30010000, +0x20000004,0x30000004,0x20010004,0x30010004, +0x00100000,0x10100000,0x00110000,0x10110000, +0x00100004,0x10100004,0x00110004,0x10110004, +0x20100000,0x30100000,0x20110000,0x30110000, +0x20100004,0x30100004,0x20110004,0x30110004, +0x00001000,0x10001000,0x00011000,0x10011000, +0x00001004,0x10001004,0x00011004,0x10011004, +0x20001000,0x30001000,0x20011000,0x30011000, +0x20001004,0x30001004,0x20011004,0x30011004, +0x00101000,0x10101000,0x00111000,0x10111000, +0x00101004,0x10101004,0x00111004,0x10111004, +0x20101000,0x30101000,0x20111000,0x30111000, +0x20101004,0x30101004,0x20111004,0x30111004, +0x00000000,0x08000000,0x00000008,0x08000008, +0x00000400,0x08000400,0x00000408,0x08000408, +0x00020000,0x08020000,0x00020008,0x08020008, +0x00020400,0x08020400,0x00020408,0x08020408, +0x00000001,0x08000001,0x00000009,0x08000009, +0x00000401,0x08000401,0x00000409,0x08000409, +0x00020001,0x08020001,0x00020009,0x08020009, +0x00020401,0x08020401,0x00020409,0x08020409, +0x02000000,0x0A000000,0x02000008,0x0A000008, +0x02000400,0x0A000400,0x02000408,0x0A000408, +0x02020000,0x0A020000,0x02020008,0x0A020008, +0x02020400,0x0A020400,0x02020408,0x0A020408, +0x02000001,0x0A000001,0x02000009,0x0A000009, +0x02000401,0x0A000401,0x02000409,0x0A000409, +0x02020001,0x0A020001,0x02020009,0x0A020009, +0x02020401,0x0A020401,0x02020409,0x0A020409, +0x00000000,0x00000100,0x00080000,0x00080100, +0x01000000,0x01000100,0x01080000,0x01080100, +0x00000010,0x00000110,0x00080010,0x00080110, +0x01000010,0x01000110,0x01080010,0x01080110, +0x00200000,0x00200100,0x00280000,0x00280100, +0x01200000,0x01200100,0x01280000,0x01280100, +0x00200010,0x00200110,0x00280010,0x00280110, +0x01200010,0x01200110,0x01280010,0x01280110, +0x00000200,0x00000300,0x00080200,0x00080300, +0x01000200,0x01000300,0x01080200,0x01080300, +0x00000210,0x00000310,0x00080210,0x00080310, +0x01000210,0x01000310,0x01080210,0x01080310, +0x00200200,0x00200300,0x00280200,0x00280300, +0x01200200,0x01200300,0x01280200,0x01280300, +0x00200210,0x00200310,0x00280210,0x00280310, +0x01200210,0x01200310,0x01280210,0x01280310, +0x00000000,0x04000000,0x00040000,0x04040000, +0x00000002,0x04000002,0x00040002,0x04040002, +0x00002000,0x04002000,0x00042000,0x04042000, +0x00002002,0x04002002,0x00042002,0x04042002, +0x00000020,0x04000020,0x00040020,0x04040020, +0x00000022,0x04000022,0x00040022,0x04040022, +0x00002020,0x04002020,0x00042020,0x04042020, +0x00002022,0x04002022,0x00042022,0x04042022, +0x00000800,0x04000800,0x00040800,0x04040800, +0x00000802,0x04000802,0x00040802,0x04040802, +0x00002800,0x04002800,0x00042800,0x04042800, +0x00002802,0x04002802,0x00042802,0x04042802, +0x00000820,0x04000820,0x00040820,0x04040820, +0x00000822,0x04000822,0x00040822,0x04040822, +0x00002820,0x04002820,0x00042820,0x04042820, +0x00002822,0x04002822,0x00042822,0x04042822, +}; diff --git a/libs/symcrypt/lib/FatalIntercept.c b/libs/symcrypt/lib/FatalIntercept.c new file mode 100644 index 00000000000..b0e11e527c0 --- /dev/null +++ b/libs/symcrypt/lib/FatalIntercept.c @@ -0,0 +1,23 @@ +// +// FatalIntercept.C +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +// +// Empty function which our test code can replace to intercept any Fatal calls. +// Used in Kernel-mode tests so that an error doesn't bugcheck the machine. +// Rather, it can kill the current thread and not take down the machine. +// +// This is in its own C file so that it is only linked in when the caller doesn't have +// a function by this name. +// + +#include "precomp.h" + +VOID +SYMCRYPT_CALL +SymCryptFatalIntercept( UINT32 fatalCode ) +{ + UNREFERENCED_PARAMETER( fatalCode ); +} diff --git a/libs/symcrypt/lib/IEEE802_11SaeCustom.c b/libs/symcrypt/lib/IEEE802_11SaeCustom.c new file mode 100644 index 00000000000..574b53400c4 --- /dev/null +++ b/libs/symcrypt/lib/IEEE802_11SaeCustom.c @@ -0,0 +1,1585 @@ +// +// IEEE802_11SaeCustom.c Implementation of the custom crypto of IEEE 802.11 SAE +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// + +#include "precomp.h" + +// Used in SAE Hunting and Pecking methods where NIST P256 is hardcoded +#define PRIME_LENGTH_BITS 256 + +// +// This data structure is used to store the associated elliptic curve and the z value corresponding to +// each IANA group mappings for each elliptic +// curve defined in IEEE Std 802.11 SAE method. +// +typedef struct _SYMCRYPT_SAE_GROUP_DATA { + SYMCRYPT_802_11_SAE_GROUP group; + const PCSYMCRYPT_ECURVE_PARAMS *pCurveParams; + const PCSYMCRYPT_MAC *macAlgorithm; + INT32 z; +} SYMCRYPT_SAE_GROUP_DATA, *PSYMCRYPT_SAE_GROUP_DATA; + +typedef const SYMCRYPT_SAE_GROUP_DATA* PCSYMCRYPT_SAE_GROUP_DATA; + +// +// Data based on IEEE Std 802.11-2020 +// Table 12.1 - Hash algorithm based on length of prime +// Table 12.2 - Unique curve parameter +// +const SYMCRYPT_SAE_GROUP_DATA g_ianaData[] = { + { SYMCRYPT_SAE_GROUP_19, &SymCryptEcurveParamsNistP256, &SymCryptHmacSha256Algorithm, -10}, + { SYMCRYPT_SAE_GROUP_20, &SymCryptEcurveParamsNistP384, &SymCryptHmacSha384Algorithm, -12}, +}; + +// +// Helper function that finds the associated IANA group data entry for a given group number +// Searches the global variable g_ianaData where the data for supported groups are stored +// +PCSYMCRYPT_SAE_GROUP_DATA SymCryptSaeFindGroupData(SYMCRYPT_802_11_SAE_GROUP ianaGroup) +{ + for (UINT32 index = 0; index < SYMCRYPT_ARRAY_SIZE(g_ianaData); index++ ) + { + if ( g_ianaData[index].group == ianaGroup ) + { + return &g_ianaData[index]; + } + } + + return NULL; +} + +// +// Helper function that returns the sizes of the field elements and elliptic curve points in bytes +// for a given IANA group number. Both output parameters are optional. +// +VOID SymCrypt802_11SaeGetGroupSizes( + SYMCRYPT_802_11_SAE_GROUP group, + _Out_opt_ SIZE_T* pcbScalar, + _Out_opt_ SIZE_T* pcbPoint ) +{ + PCSYMCRYPT_SAE_GROUP_DATA pGroupData = NULL; + SIZE_T cbScalar = 0; + SIZE_T cbPoint = 0; + + pGroupData = SymCryptSaeFindGroupData( group ); + + if ( pGroupData != NULL ) + { + cbScalar = ( *( pGroupData->pCurveParams ) )->cbFieldLength; + cbPoint = 2 * cbScalar; + } + + if ( pcbScalar != NULL ) + { + *pcbScalar = cbScalar; + } + + if ( pcbPoint != NULL ) + { + *pcbPoint = cbPoint; + } +} + +// +// Calculate sqrt(peVal) if it exists. If so, *puIsQuadraticResidue is set to 0xFFFF`FFFF. +// Otherwise, *puIsQuadraticResidue is set to 0. +// WARNING: *peSqrtArg is set even if the square root doesn't exist. Use masked copy functions +// with *puIsQuadraticResidue so as to use the value of *peSqrtArg only if the square root exists. +// +// - pmMod: Modulus of the curve. Must equal 3 mod 4, which holds for all NIST Prime curves except P224 +// - peVal: Value to calculate the square root of +// - puIsQuadraticResidue: mask value, true if sqrt(peVal) exists, false otherwise +// - peSqrtArg: optional out argument for square root value +// - pbScratch, cbScratch: scratch space >= SYMCRYPT_SCRATCH_BYTES_FOR_MODEXP( pmMod->nDigits ) +// +SYMCRYPT_ERROR +SymCryptModSqrt( + _In_ PSYMCRYPT_MODULUS pmMod, + _In_ PSYMCRYPT_MODELEMENT peVal, + _Out_ PUINT32 puIsQuadraticResidue, + _Out_opt_ PSYMCRYPT_MODELEMENT peSqrtArg, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PSYMCRYPT_INT piTmp = SymCryptIntAllocate( SymCryptDigitsFromBits( pmMod->Divisor.nBits ) ); + PSYMCRYPT_MODELEMENT peSqrt = SymCryptModElementAllocate( pmMod ); + PSYMCRYPT_MODELEMENT peTmp = SymCryptModElementAllocate( pmMod ); + + if( piTmp == NULL || peSqrt == NULL || peTmp == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + // Sqrt( v ) = v^{(P+1)/4} mod P when P = 3 mod 4 as it is here + SymCryptIntCopy( SymCryptIntFromModulus( pmMod ), piTmp ); + SymCryptIntAddUint32( piTmp, 1, piTmp ); // No overflow as our prime is not 2^256 - 1 + + SYMCRYPT_ASSERT( (SymCryptIntGetValueLsbits32(piTmp) & 3) == 0); + SymCryptIntDivPow2(piTmp, 2, piTmp); + // iX = (P+1)/4 + + // Compute Sqrt( v ) if it exists + SymCryptModExp( pmMod, peVal, piTmp, pmMod->Divisor.nBits - 2, 0, peSqrt, pbScratch, cbScratch ); + + SymCryptModSquare( pmMod, peSqrt, peTmp, pbScratch, cbScratch ); + *puIsQuadraticResidue = SymCryptModElementIsEqual( pmMod, peTmp, peVal ); + + if( peSqrtArg != NULL ) + { + SymCryptModElementCopy( pmMod, peSqrt, peSqrtArg ); + } + +cleanup: + + if( piTmp != NULL ) + { + SymCryptIntFree( piTmp ); + piTmp = NULL; + } + + if( peSqrt != NULL ) + { + SymCryptModElementFree( pmMod, peSqrt ); + peSqrt = NULL; + } + + if( peTmp != NULL ) + { + SymCryptModElementFree( pmMod, peTmp ); + peTmp = NULL; + } + + return scError; + +} + +// +// Calculates SSWU( u ) as described in 12.4.4.2.3 +// +// - pCurve: The curve object to use. +// - z: z value used in the SSWU calculation. Currently we assume this value to be negative. +// - peU: Value to calculate SSWU of. +// - popP: point on the curve found by SSWU. +// - pbScratch, cbScratch: scratch space >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS( pCurve ) +// +SYMCRYPT_ERROR +SymCryptSswu( + _In_ PSYMCRYPT_ECURVE pCurve, + _In_ INT32 z, + _In_ PSYMCRYPT_MODELEMENT peU, + _Out_ PSYMCRYPT_ECPOINT poP, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + UINT32 selectionMask = 0; // Mask variable for masked copy operations. "l" in the spec + + PSYMCRYPT_INT piTmp = NULL; + PSYMCRYPT_MODELEMENT peTmp = NULL; + + PSYMCRYPT_MODELEMENT peZ = NULL; + PSYMCRYPT_MODELEMENT peM = NULL; + PSYMCRYPT_MODELEMENT peT = NULL; + PSYMCRYPT_MODELEMENT peX1 = NULL; + PSYMCRYPT_MODELEMENT peX2 = NULL; + PSYMCRYPT_MODELEMENT peGX1 = NULL; + PSYMCRYPT_MODELEMENT peGX2 = NULL; + + BYTE pointBuf[SYMCRYPT_SAE_MAX_EC_POINT_SIZE_BYTES] = { 0 }; + + SYMCRYPT_ASSERT( z < 0 ); + + piTmp = SymCryptIntAllocate( SymCryptDigitsFromBits( pCurve->FModBitsize ) ); + + peTmp = SymCryptModElementAllocate( pCurve->FMod ); + peZ = SymCryptModElementAllocate( pCurve->FMod ); + peM = SymCryptModElementAllocate( pCurve->FMod ); + peT = SymCryptModElementAllocate( pCurve->FMod ); + peX1 = SymCryptModElementAllocate( pCurve->FMod ); + peX2 = SymCryptModElementAllocate( pCurve->FMod ); + peGX1 = SymCryptModElementAllocate( pCurve->FMod ); + peGX2 = SymCryptModElementAllocate( pCurve->FMod ); + + if( piTmp == NULL|| peTmp == NULL || peZ == NULL || peM == NULL || peT == NULL || + peX1 == NULL || peX2 == NULL || peGX1 == NULL || peGX2 == NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + // Convert z to mod element + // Currently we avoid a branching based on the sign of z to make the assignment and assume it will + // be negative which holds for the set of possible values as of now (NIST P256 and NIST P384). + // There is no direct function to create a SYMCRYPT_INT from a signed INT32, so when z is negative + // we change its sign and call SymCryptModElementSetValueNegUInt32 + SymCryptModElementSetValueNegUint32(-z, pCurve->FMod, peZ, pbScratch, cbScratch); + + // Set peTmp to 1 for convenience later + SymCryptModElementSetValueUint32( 1, pCurve->FMod, peTmp, pbScratch, cbScratch ); + + // m = ( z^2 * u^4 + z * u^2 ) = (z * u^2)(z * u^2 + 1) modulo p + SymCryptModSquare( pCurve->FMod, peU, peM, pbScratch, cbScratch ); // M = u^2 + SymCryptModMul( pCurve->FMod, peM, peZ, peM, pbScratch, cbScratch ); // M = z * u^2 + SymCryptModAdd( pCurve->FMod, peM, peTmp, peTmp, pbScratch, cbScratch ); // tmp = (z * u^2 + 1) + SymCryptModMul( pCurve->FMod, peM, peTmp, peM, pbScratch, cbScratch ); // M = M * tmp = (z * u^2)(z * u^2 + 1) + + // l = CEQ( m, 0 ) + selectionMask = SymCryptModElementIsZero( pCurve->FMod, peM ); + + // t = inverse( m ) where inverse ( m ) = m^( p-2 ) modulo p + SymCryptIntSubUint32( SymCryptIntFromModulus( pCurve->FMod ), 2, piTmp ); + SymCryptModExp( pCurve->FMod, peM, piTmp, pCurve->FModBitsize, 0, peT, pbScratch, cbScratch ); + + //x1 = CSEL( l, ( b / ( z * a ) modulo p ), ( ( - b / a ) * ( 1 + t ) ) modulo p ) + // where CSEL(x,y,z) operates in constant time and returns y if x is true and z otherwise. + SymCryptModMul( pCurve->FMod, peZ, pCurve->A, peTmp, pbScratch, cbScratch ); // tmp = z * a + SymCryptModInv( pCurve->FMod, peTmp, peTmp, SYMCRYPT_FLAG_DATA_PUBLIC | SYMCRYPT_FLAG_MODULUS_PRIME, pbScratch, cbScratch ); // tmp = 1/(z * a) + SymCryptModMul( pCurve->FMod, pCurve->B, peTmp, peX1, pbScratch, cbScratch ); // x1A = B * 1/(z * a) + + SymCryptModInv( pCurve->FMod, pCurve->A, peTmp, SYMCRYPT_FLAG_DATA_PUBLIC | SYMCRYPT_FLAG_MODULUS_PRIME, pbScratch, cbScratch ); // tmp = 1/a + SymCryptModMul( pCurve->FMod, pCurve->B, peTmp, peTmp, pbScratch, cbScratch ); // tmp = b * 1/a + SymCryptModNeg( pCurve->FMod, peTmp, peTmp, pbScratch, cbScratch ); // tmp = -(b * 1/a) + + // NB: in this block we're using X2 as the second candidate for CSEL. This allows us to choose the + // correct X1 by copying X2 to X1 if l is false + SymCryptIntSetValueUint32( 1, piTmp ); + SymCryptIntToModElement( piTmp, pCurve->FMod, peX2, pbScratch, cbScratch ); // X1B = 1 + SymCryptModAdd( pCurve->FMod, peX2, peT, peX2, pbScratch, cbScratch ); // X1B = 1 + t + SymCryptModMul( pCurve->FMod, peX2, peTmp, peX2, pbScratch, cbScratch ); // X1B = -(b * 1/a)(1 + t) + + // Note: we need the binary complement of l since MaskedCopy copies only if the mask is 0xFFFFFFFF, + // and we want the second X1 candidate iff l is false + SymCryptModElementMaskedCopy( pCurve->FMod, peX2, peX1, ~selectionMask ); + + // gx1 = ( x1^3 + a * x1 + b ) = (x1^2 + a)*x1 + b modulo p + SymCryptModSquare( pCurve->FMod, peX1, peGX1, pbScratch, cbScratch ); // gx1 = x1^2 + SymCryptModAdd( pCurve->FMod, peGX1, pCurve->A, peGX1, pbScratch, cbScratch ); // gx1 = x1^2 + a + SymCryptModMul( pCurve->FMod, peGX1, peX1, peGX1, pbScratch, cbScratch ); // gx1 = (x1^2 + a)*x1 + SymCryptModAdd( pCurve->FMod, peGX1, pCurve->B, peGX1, pbScratch, cbScratch ); // gx1 = (x1^2 + a)*x1 + b + + //x2 = ( z * u^2 * x1 ) modulo p + SymCryptModSquare( pCurve->FMod, peU, peX2, pbScratch, cbScratch ); // x2 = u^2 + SymCryptModMul( pCurve->FMod, peX2, peZ, peX2, pbScratch, cbScratch ); // x2 = u^2 * z + SymCryptModMul( pCurve->FMod, peX2, peX1, peX2, pbScratch, cbScratch ); // x2 = u^2 * z * x1 + + //gx2 = ( x2^3 + a * x2 + b ) = (x2^2 + a)*x2 + b modulo p + SymCryptModSquare( pCurve->FMod, peX2, peGX2, pbScratch, cbScratch ); // gx2 = x2^2 + SymCryptModAdd( pCurve->FMod, peGX2, pCurve->A, peGX2, pbScratch, cbScratch ); // gx2 = x2^2 + a + SymCryptModMul( pCurve->FMod, peGX2, peX2, peGX2, pbScratch, cbScratch ); // gx2 = (x2^2 + a)*x2 + SymCryptModAdd( pCurve->FMod, peGX2, pCurve->B, peGX2, pbScratch, cbScratch ); // gx2 = (x2^2 + a)*x2 + b + + //l = gx1 is a quadratic residue modulo p + scError = SymCryptModSqrt( pCurve->FMod, peGX1, &selectionMask, NULL, pbScratch, cbScratch ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // v = CSEL( l, gx1, gx2 ) + // (Using gx1 as a temporary for v) + SymCryptModElementMaskedCopy( pCurve->FMod, peGX2, peGX1, ~selectionMask ); + + // x = CSEL( l, x1, x2 ) + // (Using x1 as a temporary for x) + SymCryptModElementMaskedCopy( pCurve->FMod, peX2, peX1, ~selectionMask ); + + // y = sqrt( v ) = v^{(P+1)/4} + // (Using gx1 as a temporary for y) + scError = SymCryptModSqrt( pCurve->FMod, peGX1, &selectionMask, peGX1, pbScratch, cbScratch ); + + // l = CEQ( LSB( u ), LSB( y ) ) + // LSB returns the least significant *BIT* of its argument + SymCryptModElementToInt( pCurve->FMod, peU, piTmp, pbScratch, cbScratch ); + UINT32 u = SymCryptIntGetValueLsbits32( piTmp ); + + SymCryptModElementToInt( pCurve->FMod, peGX1, piTmp, pbScratch, cbScratch ); + UINT32 y = SymCryptIntGetValueLsbits32( piTmp ); + + selectionMask = SYMCRYPT_MASK32_EQ( u & 1, y & 1 ); + + // P = CSEL( l, ( x, y ), ( x, p - y ) ) + // equivalently, y = CSEL( l, y, p - y ) + // (p - y) mod p is equivalent to -y mod p, so we end up with + // y = CSEL(l, y, -y) + // We use gx1 for y + SymCryptModNeg( pCurve->FMod, peGX1, peTmp, pbScratch, cbScratch ); + SymCryptModElementMaskedCopy( pCurve->FMod, peTmp, peGX1, ~selectionMask ); + + SymCryptModElementGetValue( pCurve->FMod, peX1, &pointBuf[0], pCurve->FModBytesize, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, pbScratch, cbScratch ); + SymCryptModElementGetValue( pCurve->FMod, peGX1, &pointBuf[pCurve->FModBytesize], pCurve->FModBytesize, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, pbScratch, cbScratch ); + + scError = SymCryptEcpointSetValue( pCurve, + pointBuf, + 2 * pCurve->FModBytesize, + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, + SYMCRYPT_ECPOINT_FORMAT_XY, + poP, + 0, + pbScratch, + cbScratch ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + +cleanup: + + if( peGX2 != NULL ) + { + SymCryptModElementFree( pCurve->FMod, peGX2 ); + peGX2 = NULL; + } + + if( peGX1 != NULL ) + { + SymCryptModElementFree( pCurve->FMod, peGX1 ); + peGX1 = NULL; + } + + if( peX2 != NULL ) + { + SymCryptModElementFree( pCurve->FMod, peX2 ); + peX2 = NULL; + } + + if( peX1 != NULL ) + { + SymCryptModElementFree( pCurve->FMod, peX1 ); + peX1 = NULL; + } + + if( peT != NULL ) + { + SymCryptModElementFree( pCurve->FMod, peT ); + peT = NULL; + } + + if( peM != NULL ) + { + SymCryptModElementFree( pCurve->FMod, peM ); + peM = NULL; + } + + if( peZ != NULL ) + { + SymCryptModElementFree( pCurve->FMod, peZ ); + peZ = NULL; + } + + if( peTmp != NULL ) + { + SymCryptModElementFree( pCurve->FMod, peTmp ); + peTmp = NULL; + } + + if( piTmp != NULL ) + { + SymCryptIntFree( piTmp ); + piTmp = NULL; + } + + return scError; +} + +SYMCRYPT_ERROR +SymCrypt802_11SaeCustomSetRandMask( + _Inout_ PSYMCRYPT_802_11_SAE_CUSTOM_STATE pState, + _Inout_updates_opt_( cbRand ) PBYTE pbRand, + SIZE_T cbRand, + _Inout_updates_opt_( cbMask) PBYTE pbMask, + SIZE_T cbMask, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PCSYMCRYPT_ECURVE pcCurve = pState->pCurve; + + SymCryptModElementSetValueUint32( 0, pcCurve->GOrd, pState->peRand, pbScratch, cbScratch ); + if( pbRand != NULL ) + { + scError = SymCryptModElementSetValue( pbRand, cbRand, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, pcCurve->GOrd, pState->peRand, pbScratch, cbScratch ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + } + + if( SymCryptModElementIsZero( pcCurve->GOrd, pState->peRand ) ) + { + SymCryptModSetRandom( pcCurve->GOrd, pState->peRand, SYMCRYPT_FLAG_MODRANDOM_ALLOW_MINUSONE, pbScratch, cbScratch ); + } + + if( pbRand != NULL ) + { + scError = SymCryptModElementGetValue( pcCurve->GOrd, pState->peRand, pbRand, cbRand, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, pbScratch, cbScratch ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + } + + SymCryptModElementSetValueUint32( 0, pcCurve->GOrd, pState->peMask, pbScratch, cbScratch ); + if( pbMask != NULL ) + { + scError = SymCryptModElementSetValue( pbMask, cbMask, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, pcCurve->GOrd, pState->peMask, pbScratch, cbScratch ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + } + + if( SymCryptModElementIsZero( pcCurve->GOrd, pState->peMask ) ) + { + SymCryptModSetRandom( pcCurve->GOrd, pState->peMask, SYMCRYPT_FLAG_MODRANDOM_ALLOW_MINUSONE, pbScratch, cbScratch ); + } + + if( pbMask != NULL ) + { + scError = SymCryptModElementGetValue( pcCurve->GOrd, pState->peMask, pbMask, cbMask, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, pbScratch, cbScratch ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + } + + // + // The standard calls for checking that peRand and peMask are not 0 or 1, and peRand + peMask is not 0 or 1. + // When the caller specifies the values we don't want to do any checking as they might be helpful in test vectors. + // When this code generates the random values, we avoid 0 or 1 (by not passing the flags allowing 0 and 1). + // We don't check that peRand + peMask > 1 because the probability of that occurring randomly is about 2^{-254} so the + // risk of this happening on any machine ever in the world is much smaller than the risk associated with adding several lines of code. + // + +cleanup: + + return scError; +} + +SYMCRYPT_ERROR +SymCrypt802_11SaeCustomInit( + _Out_ PSYMCRYPT_802_11_SAE_CUSTOM_STATE pState, + _In_reads_( 6 ) PCBYTE pbMac1, + _In_reads_( 6 ) PCBYTE pbMac2, + _In_reads_( cbPassword ) PCBYTE pbPassword, + SIZE_T cbPassword, + _Out_opt_ PBYTE pbCounter, + _Inout_updates_opt_( 32 ) PBYTE pbRand, + _Inout_updates_opt_( 32 ) PBYTE pbMask ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + BYTE counter; + UINT32 notFoundMask; + UINT32 solutionMask; + UINT32 negMask; + BYTE abSeed[SYMCRYPT_HMAC_SHA256_RESULT_SIZE]; + BYTE abValue[SYMCRYPT_HMAC_SHA256_RESULT_SIZE]; + BYTE abSeedKey[16]; // Need only 12, but the extra bytes make the code easier. + SYMCRYPT_HMAC_SHA256_EXPANDED_KEY hmacSeedKey; + SYMCRYPT_HMAC_SHA256_EXPANDED_KEY hmacValueKey; + SYMCRYPT_HMAC_SHA256_STATE hmacState; + BYTE abTmp[2]; + BYTE pointBuf[ 64 ]; + PBYTE pbScratch = NULL; + SIZE_T cbScratch = 0; + UINT64 minMac; + UINT64 maxMac; + + UINT32 nDigits; + PSYMCRYPT_ECURVE pCurve; // Only a cache, pState->pCurve owns the allocation + PSYMCRYPT_INT piTmp = NULL; + PSYMCRYPT_MODELEMENT peX = NULL; + PSYMCRYPT_MODELEMENT peY = NULL; + PSYMCRYPT_MODELEMENT peCubic = NULL; + PSYMCRYPT_MODELEMENT peTmp = NULL; + PSYMCRYPT_ECPOINT poPWECandidate = NULL; + + // Set state to 0 so that our pointers have valid values. + SymCryptWipe( pState, sizeof( *pState ) ); + + // Per IEEE 802.11-2016 section 12.4.4.1 the mandatory-to-implement curve is + // number 19 from the IANA Group description for RFC 2409 (IKE) + // The IANA website maps this to a 256-bit Random ECP group in RFC 5903. + // RFC 5903 specifies this group to be identical to the NIST P256 curve. + pCurve = SymCryptEcurveAllocate( SymCryptEcurveParamsNistP256, 0 ); + pState->pCurve = pCurve; + if( pCurve == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + pState->macAlgorithm = SymCryptHmacSha256Algorithm; + + pState->peRand = SymCryptModElementAllocate( pCurve->GOrd ); + if( pState->peRand == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + pState->peMask = SymCryptModElementAllocate( pCurve->GOrd ); + if( pState->peMask == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + pState->poPWE = SymCryptEcpointAllocate( pCurve ); + if( pState->poPWE == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + nDigits = SymCryptDigitsFromBits( PRIME_LENGTH_BITS ); + + cbScratch = SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigits ), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_MODEXP( nDigits ), + SYMCRYPT_SCRATCH_BYTES_FOR_GETSET_VALUE_ECURVE_OPERATIONS( pCurve ) ) ); + pbScratch = SymCryptCallbackAlloc( cbScratch ); + + piTmp = SymCryptIntAllocate( nDigits ); + peX = SymCryptModElementAllocate( pCurve->FMod ); + peY = SymCryptModElementAllocate( pCurve->FMod ); + peCubic = SymCryptModElementAllocate( pCurve->FMod ); + peTmp = SymCryptModElementAllocate( pCurve->FMod ); + poPWECandidate = SymCryptEcpointAllocate( pCurve ); + + if( pbScratch == NULL || piTmp == NULL || peX == NULL || peY == NULL || peCubic == NULL || peTmp == NULL || poPWECandidate == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + SymCryptWipeKnownSize( abSeedKey, sizeof( abSeedKey ) ); + memcpy( &abSeedKey[0], pbMac1, 6 ); + minMac = SYMCRYPT_LOAD_MSBFIRST64( abSeedKey ); + memcpy( &abSeedKey[0], pbMac2, 6 ); + maxMac = SYMCRYPT_LOAD_MSBFIRST64( abSeedKey ); + + if( minMac > maxMac ) + { + // MAC values are public, no side-channel issues with this if() + // Swap the two values + minMac ^= maxMac; + maxMac ^= minMac; + minMac ^= maxMac; + } + + // Now we write the two MACs into the buffer. + // Note the slight overlap, and the use of 14 bytes rather than 12 + SYMCRYPT_STORE_MSBFIRST64( &abSeedKey[0], maxMac ); + SYMCRYPT_STORE_MSBFIRST64( &abSeedKey[6], minMac ); // This writes up to abSeedKey[14] + + SymCryptHmacSha256ExpandKey( &hmacSeedKey, abSeedKey, 12 ); + SymCryptWipeKnownSize( abSeedKey, sizeof( abSeedKey ) ); // Not strictly speaking a secret, but good general hygiene + + notFoundMask = (UINT32)-1; + counter = 0; + + // We exit the loop only after 40 or more iterations + // This greatly reduces the side-channel of how often we run this loop. + while( notFoundMask != 0 || counter < 40 ) + { + counter += 1; + if( counter == 0 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // pwd-seed = Hmac-sha256( MacA || MacB , Password || counter ) + SymCryptHmacSha256Init( &hmacState, &hmacSeedKey ); + SymCryptHmacSha256Append( &hmacState, pbPassword, cbPassword ); + SymCryptHmacSha256Append( &hmacState, &counter, 1 ); + SymCryptHmacSha256Result( &hmacState, abSeed ); + + // pwd-value + SymCryptHmacSha256ExpandKey( &hmacValueKey, abSeed, sizeof( abSeed ) ); + SymCryptHmacSha256Init( &hmacState, &hmacValueKey ); + + SYMCRYPT_STORE_LSBFIRST16( abTmp, 1 ); + SymCryptHmacSha256Append( &hmacState, abTmp, 2 ); // i value = 1 + // Spec is unclear on whether there should be a terminating 0 on the context + // There are 23 characters in the string, so using len=24 gives us a zero + SymCryptHmacSha256Append( &hmacState, (PCBYTE) "SAE Hunting and Pecking", 23 ); + + // Pick up the byte representation of p from the parameters + SymCryptHmacSha256Append( &hmacState, (BYTE *)(SymCryptEcurveParamsNistP256 + 1), 32 ); + + SYMCRYPT_STORE_LSBFIRST16( abTmp, 256 ); + SymCryptHmacSha256Append( &hmacState, abTmp, 2 ); // Length value = 256 + SymCryptHmacSha256Result( &hmacState, abValue ); + + // Get the pwd-value into an integer + scError = SymCryptIntSetValue( abValue, sizeof( abValue ), SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, piTmp ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Check that it is less than P + if( !SymCryptIntIsLessThan( piTmp, SymCryptIntFromModulus( pCurve->FMod ) ) ) + { + // This is a slight side-channel, but our prime P starts with FFFFFFFF so the probability of + // hitting this case is < 2^-32. + continue; + } + + // Compute x^3 + A*x + B + SymCryptIntToModElement( piTmp, pCurve->FMod, peX, pbScratch, cbScratch ); + SymCryptModSquare( pCurve->FMod, peX, peCubic, pbScratch, cbScratch ); + SymCryptModAdd( pCurve->FMod, peCubic, pCurve->A, peCubic, pbScratch, cbScratch ); + SymCryptModMul( pCurve->FMod, peCubic, peX, peCubic, pbScratch, cbScratch ); + SymCryptModAdd( pCurve->FMod, peCubic, pCurve->B, peCubic, pbScratch, cbScratch ); + + // Get the quadratic residue of (x^3 + A*x + B) modulo P if it exists + scError = SymCryptModSqrt( pCurve->FMod, peCubic, &solutionMask, peY, pbScratch, cbScratch ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + solutionMask &= notFoundMask; + + // Pick Y or -Y according to the LSbits + SymCryptModElementToInt( pCurve->FMod, peY, piTmp, pbScratch, cbScratch ); + SymCryptModNeg( pCurve->FMod, peY, peTmp, pbScratch, cbScratch ); + + negMask = 0 - ((abSeed[ sizeof( abSeed ) - 1 ] ^ SymCryptIntGetValueLsbits32( piTmp ) ) & 1); + SymCryptModElementMaskedCopy( pCurve->FMod, peTmp, peY, negMask ); + + SymCryptModElementGetValue( pCurve->FMod, peX, &pointBuf[ 0], 32, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, pbScratch, cbScratch ); + SymCryptModElementGetValue( pCurve->FMod, peY, &pointBuf[32], 32, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, pbScratch, cbScratch ); + scError = SymCryptEcpointSetValue( pCurve, + pointBuf, + sizeof( pointBuf ), + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, + SYMCRYPT_ECPOINT_FORMAT_XY, + poPWECandidate, + 0, + pbScratch, + cbScratch ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + SymCryptEcpointMaskedCopy( pCurve, poPWECandidate, pState->poPWE, solutionMask ); + pState->counter |= (BYTE)(counter & solutionMask); + + notFoundMask &= ~solutionMask; + } + + scError = SymCrypt802_11SaeCustomSetRandMask( pState, pbRand, 32, pbMask, 32, pbScratch, cbScratch ); + if( scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + if( pbCounter != NULL ) + { + *pbCounter = pState->counter; + } + +cleanup: + + SymCryptWipe( &hmacSeedKey, sizeof( hmacSeedKey ) ); + SymCryptWipe( &hmacValueKey, sizeof( hmacValueKey ) ); + SymCryptWipe( abSeed, sizeof( abSeed ) ); + SymCryptWipe( abValue, sizeof( abValue ) ); + SymCryptWipe( pointBuf, sizeof( pointBuf ) ); + + if( piTmp != NULL ) + { + SymCryptIntFree( piTmp ); + piTmp = NULL; + } + + if( peX != NULL ) + { + SymCryptModElementFree( pCurve->FMod, peX ); + peX = NULL; + } + + if( peY != NULL ) + { + SymCryptModElementFree( pCurve->FMod, peY ); + peY = NULL; + } + + if( peCubic != NULL ) + { + SymCryptModElementFree( pCurve->FMod, peCubic ); + peCubic = NULL; + } + + if( peTmp != NULL ) + { + SymCryptModElementFree( pCurve->FMod, peTmp ); + peTmp = NULL; + } + + if( poPWECandidate != NULL ) + { + SymCryptEcpointFree( pCurve, poPWECandidate ); + poPWECandidate = NULL; + } + + if( scError != SYMCRYPT_NO_ERROR ) + { + SymCrypt802_11SaeCustomDestroy( pState ); + } + + if( pbScratch != NULL ) + { + SymCryptWipe( pbScratch, cbScratch ); + SymCryptCallbackFree( pbScratch ); + pbScratch = NULL; + } + + return scError; +} + + +SYMCRYPT_ERROR +SymCrypt802_11SaeCustomCreatePTGeneric( + SYMCRYPT_802_11_SAE_GROUP group, + _In_reads_( cbSsid ) PCBYTE pbSsid, + SIZE_T cbSsid, + _In_reads_( cbPassword ) PCBYTE pbPassword, + SIZE_T cbPassword, + _In_reads_opt_( cbPasswordIdentifier ) PCBYTE pbPasswordIdentifier, + SIZE_T cbPasswordIdentifier, + _Out_writes_( cbPT ) PBYTE pbPT, + SIZE_T cbPT) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + SIZE_T cbIkm = 0; + SIZE_T cbScratch = 0; + + PBYTE pbPwdValue = NULL; + UINT32 cbPwdValue = 0; + PBYTE pbScratch = NULL; + SYMCRYPT_HKDF_EXPANDED_KEY hkdfKey; + + PSYMCRYPT_ECURVE pCurve = NULL; + PCSYMCRYPT_MAC pMacAlgorithm = NULL; + PSYMCRYPT_INT piU1 = NULL; + PSYMCRYPT_INT piU2 = NULL; + PSYMCRYPT_MODELEMENT peU1 = NULL; + PSYMCRYPT_MODELEMENT peU2 = NULL; + + PSYMCRYPT_ECPOINT poP1 = NULL; + PSYMCRYPT_ECPOINT poP2 = NULL; + PSYMCRYPT_ECPOINT poPT = NULL; + + PCSYMCRYPT_SAE_GROUP_DATA pGroupData = NULL; + + + pGroupData = SymCryptSaeFindGroupData( group ); + + // Provided IANA group number must match one of the supported groups + if ( pGroupData == NULL) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Construct the objects associated with the IANA group number + pCurve = SymCryptEcurveAllocate( *( pGroupData->pCurveParams), 0 ); + if( pCurve == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + pMacAlgorithm = *( pGroupData->macAlgorithm ); + + const UINT32 nDigits = SymCryptEcurveDigitsofFieldElement( pCurve ); + + cbIkm = cbPassword + cbPasswordIdentifier; + cbScratch = SYMCRYPT_MAX( cbIkm, + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigits ), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_MODEXP( nDigits ), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_GETSET_VALUE_ECURVE_OPERATIONS( pCurve ), + SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS( pCurve ) ) ) ) ); + pbScratch = SymCryptCallbackAlloc( cbScratch ); + + // len = olen( p ) + floor( olen( p ) / 2 ) + cbPwdValue = SYMCRYPT_BYTES_FROM_BITS(pCurve->FModBitsize) + SYMCRYPT_BYTES_FROM_BITS(pCurve->FModBitsize) / 2; + + pbPwdValue = SymCryptCallbackAlloc( cbPwdValue ); + + piU1 = SymCryptIntAllocate( SymCryptDigitsFromBits( cbPwdValue * 8 ) ); + piU2 = SymCryptIntAllocate( SymCryptDigitsFromBits( cbPwdValue * 8 ) ); + peU1 = SymCryptModElementAllocate( pCurve->FMod ); + peU2 = SymCryptModElementAllocate( pCurve->FMod ); + + poP1 = SymCryptEcpointAllocate( pCurve ); + poP2 = SymCryptEcpointAllocate( pCurve ); + poPT = SymCryptEcpointAllocate( pCurve ); + + if( pbScratch == NULL || pbPwdValue == NULL || piU1 == NULL || piU2 == NULL || + peU1 == NULL || peU2 == NULL || poP1 == NULL || poP2 == NULL || poPT == NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + // pwd-seed = HKDF-Extract( ssid, password [|| identifier] ) + // Note that SymCryptHkdfExpandKey corresponds to HKDF-Extract + memcpy( pbScratch, pbPassword, cbPassword ); + if( pbPasswordIdentifier ) + { + memcpy( pbScratch + cbPassword, pbPasswordIdentifier, cbPasswordIdentifier ); + } + + scError = SymCryptHkdfExpandKey( &hkdfKey, pMacAlgorithm, pbScratch, cbIkm, pbSsid, cbSsid ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // pwd-value = HKDF-Expand( pwd-seed, "SAE Hash to Element u1 P1", len ) + // Note that SymCryptHkdf derive corresponds to HKDF-Expand + // Salt does not include a null terminator, so the length is 25 chars + scError = SymCryptHkdfDerive( &hkdfKey, (PCBYTE) "SAE Hash to Element u1 P1", 25, pbPwdValue, cbPwdValue ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // u1 = pwd-value modulo p + scError = SymCryptIntSetValue( pbPwdValue, cbPwdValue, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, piU1 ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + SymCryptIntToModElement( piU1, pCurve->FMod, peU1, pbScratch, cbScratch ); + + // P1 = SSWU( u1 ) + SymCryptSswu( pCurve, pGroupData->z, peU1, poP1, pbScratch, cbScratch ); + + // pwd-value = HKDF-Expand( pwd-seed, "SAE Hash to Element u2 P2", len ) + scError = SymCryptHkdfDerive( &hkdfKey, (PCBYTE) "SAE Hash to Element u2 P2", 25, pbPwdValue, cbPwdValue ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // u2 = pwd-value modulo p + scError = SymCryptIntSetValue( pbPwdValue, cbPwdValue, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, piU2 ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + SymCryptIntToModElement( piU2, pCurve->FMod, peU2, pbScratch, cbScratch ); + + // P2 = SSWU( u2 ) + scError = SymCryptSswu( pCurve, pGroupData->z, peU2, poP2, pbScratch, cbScratch ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // PT = P1 + P2 + SymCryptEcpointAdd( pCurve, poP1, poP2, poPT, 0, pbScratch, cbScratch ); + + scError = SymCryptEcpointGetValue( pCurve, + poPT, + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, + SYMCRYPT_ECPOINT_FORMAT_XY, + pbPT, + cbPT, + 0, + pbScratch, + cbScratch ); + SYMCRYPT_ASSERT( scError == SYMCRYPT_NO_ERROR ); + +cleanup: + + if( poP2 != NULL ) + { + SymCryptEcpointFree( pCurve, poP2 ); + poP2 = NULL; + } + + if( poP1 != NULL ) + { + SymCryptEcpointFree( pCurve, poP1 ); + poP1 = NULL; + } + + if( poPT != NULL ) + { + SymCryptEcpointFree( pCurve, poPT ); + poPT = NULL; + } + + if( peU2 != NULL ) + { + SymCryptModElementFree( pCurve->FMod, peU2 ); + peU2 = NULL; + } + + if( peU1 != NULL ) + { + SymCryptModElementFree( pCurve->FMod, peU1 ); + peU1 = NULL; + } + + if( piU2 != NULL ) + { + SymCryptIntFree( piU2 ); + piU2 = NULL; + } + + if( piU1 != NULL ) + { + SymCryptIntFree( piU1 ); + piU1 = NULL; + } + + if( pbPwdValue != NULL ) + { + SymCryptWipe( pbPwdValue, cbPwdValue ); + SymCryptCallbackFree( pbPwdValue ); + pbPwdValue = NULL; + } + + if( pbScratch != NULL ) + { + SymCryptWipe( pbScratch, cbScratch ); + SymCryptCallbackFree( pbScratch ); + pbScratch = NULL; + } + + if ( pCurve != NULL ) + { + SymCryptEcurveFree( pCurve ); + pCurve = NULL; + } + + return scError; +} + + +SYMCRYPT_ERROR +SymCrypt802_11SaeCustomCreatePT( + _In_reads_( cbSsid ) PCBYTE pbSsid, + SIZE_T cbSsid, + _In_reads_( cbPassword ) PCBYTE pbPassword, + SIZE_T cbPassword, + _In_reads_opt_( cbPasswordIdentifier ) PCBYTE pbPasswordIdentifier, + SIZE_T cbPasswordIdentifier, + _Out_writes_( 64 ) PBYTE pbPT ) +{ + return SymCrypt802_11SaeCustomCreatePTGeneric( SYMCRYPT_SAE_GROUP_19, + pbSsid, + cbSsid, + pbPassword, + cbPassword, + pbPasswordIdentifier, + cbPasswordIdentifier, + pbPT, + 64 ); +} + + +SYMCRYPT_ERROR +SymCrypt802_11SaeCustomInitH2EGeneric( + _Out_ PSYMCRYPT_802_11_SAE_CUSTOM_STATE pState, + SYMCRYPT_802_11_SAE_GROUP group, + _In_reads_( cbPT ) PCBYTE pbPT, + SIZE_T cbPT, + _In_reads_( 6 ) PCBYTE pbMacA, + _In_reads_( 6 ) PCBYTE pbMacB, + _Inout_updates_opt_( cbRand ) PBYTE pbRand, + SIZE_T cbRand, + _Inout_updates_opt_( cbMask ) PBYTE pbMask, + SIZE_T cbMask) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + BYTE hmacKeyBytes[SYMCRYPT_SAE_MAX_HMAC_OUTPUT_SIZE_BYTES] = { 0 }; + BYTE valBytes[SYMCRYPT_SAE_MAX_HMAC_OUTPUT_SIZE_BYTES] = { 0 }; + BYTE macBuffer[16] = { 0 }; // Need only 12, but the extra bytes make the code easier. + SYMCRYPT_MAC_EXPANDED_KEY hmacKey = { 0 }; + SYMCRYPT_MAC_STATE hmacState = { 0 }; + + SIZE_T cbScratch = 0; + PBYTE pbScratch = NULL; + + UINT64 minMac = 0; + UINT64 maxMac = 0; + + UINT32 nDigits = 0; + + PSYMCRYPT_INT piTmp = NULL; + PSYMCRYPT_MODULUS pmMod = NULL; + PSYMCRYPT_MODELEMENT peVal = NULL; + PSYMCRYPT_MODELEMENT peTmp = NULL; + PSYMCRYPT_ECPOINT poPT = NULL; + PCSYMCRYPT_SAE_GROUP_DATA pGroupData = NULL; + PCSYMCRYPT_MAC pMacAlgorithm = NULL; + + // Set state to 0 so that our pointers have valid values. + SymCryptWipeKnownSize( pState, sizeof( *pState ) ); + + PSYMCRYPT_ECURVE pCurve = NULL; // Weak reference; curve is owned by pState + + pGroupData = SymCryptSaeFindGroupData( group ); + + // Provided IANA group number must match one of the supported groups + if ( pGroupData == NULL ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Construct the objects associated with the IANA group number + pCurve = SymCryptEcurveAllocate( *( pGroupData->pCurveParams ), 0 ); + if ( pCurve == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + pState->pCurve = pCurve; + + pMacAlgorithm = *( pGroupData->macAlgorithm ); + + SIZE_T cbHMACOutputSize = pMacAlgorithm->resultSize; + + pState->peRand = SymCryptModElementAllocate( pCurve->GOrd ); + if( pState->peRand == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + pState->peMask = SymCryptModElementAllocate( pCurve->GOrd ); + if( pState->peMask == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + pState->poPWE = SymCryptEcpointAllocate( pCurve ); + if( pState->poPWE == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + nDigits = SymCryptDigitsFromBits( pCurve->GOrdBitsize ); + + piTmp = SymCryptIntAllocate( nDigits ); + pmMod = SymCryptModulusAllocate( nDigits ); + poPT = SymCryptEcpointAllocate( pCurve ); + + cbScratch = SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigits ), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_GETSET_VALUE_ECURVE_OPERATIONS( pCurve ), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS( pCurve ), + SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_SCALAR_ECURVE_OPERATIONS ( pCurve, 1 ) ) ) ); + pbScratch = SymCryptCallbackAlloc( cbScratch ); + + if( piTmp == NULL || pmMod == NULL || poPT == NULL || pbScratch == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + memcpy( &macBuffer[0], pbMacA, 6 ); + minMac = SYMCRYPT_LOAD_MSBFIRST64( macBuffer ); + memcpy( &macBuffer[0], pbMacB, 6 ); + maxMac = SYMCRYPT_LOAD_MSBFIRST64( macBuffer ); + + if( minMac > maxMac ) + { + // MAC values are public, no side-channel issues with this if() + // Swap the two values + minMac ^= maxMac; + maxMac ^= minMac; + minMac ^= maxMac; + } + + // Now we write the two MACs into the buffer. + // Note the slight overlap, and the use of 14 bytes rather than 12 + SYMCRYPT_STORE_MSBFIRST64( &macBuffer[0], maxMac ); + SYMCRYPT_STORE_MSBFIRST64( &macBuffer[6], minMac ); // This writes up to macBuffer[14] + + // val = hmac-sha256( 0^n, maxMac || minMac ) + // The HMAC key is is a buffer of all zeros whose length equals the length of the digest from the hash function + pMacAlgorithm->expandKeyFunc(&hmacKey, hmacKeyBytes, cbHMACOutputSize); + + pMacAlgorithm->initFunc( &hmacState, &hmacKey ); + pMacAlgorithm->appendFunc( &hmacState, macBuffer, 12 ); + pMacAlgorithm->resultFunc( &hmacState, valBytes ); + + // val = val (#4666)modulo (q - 1) + 1 + SymCryptIntSubUint32( SymCryptIntFromModulus(pCurve->GOrd), 1, piTmp ); + SymCryptIntToModulus( piTmp, pmMod, 1, SYMCRYPT_FLAG_DATA_PUBLIC, pbScratch, cbScratch ); + + peVal = SymCryptModElementAllocate( pmMod ); + peTmp = SymCryptModElementAllocate( pmMod ); + + if( peVal == NULL || peTmp == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + scError = SymCryptModElementSetValue( valBytes, cbHMACOutputSize, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, pmMod, peVal, pbScratch, cbScratch ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + SymCryptModElementSetValueUint32( 1, pmMod, peTmp, pbScratch, cbScratch ); + SymCryptModAdd( pmMod, peVal, peTmp, peVal, pbScratch, cbScratch ); + + SymCryptModElementToInt( pmMod, peVal, piTmp, pbScratch, cbScratch ); + + scError = SymCryptEcpointSetValue( pCurve, + pbPT, + cbPT, + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, + SYMCRYPT_ECPOINT_FORMAT_XY, + poPT, + 0, + pbScratch, + cbScratch ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + scError = SymCryptEcpointScalarMul( pCurve, piTmp, poPT, 0, pState->poPWE, pbScratch, cbScratch ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + scError = SymCrypt802_11SaeCustomSetRandMask( pState, pbRand, cbRand, pbMask, cbMask, pbScratch, cbScratch ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + +cleanup: + + if( peTmp != NULL ) + { + SymCryptModElementFree( pmMod, peTmp ); + peTmp = NULL; + } + + if( peVal != NULL ) + { + SymCryptModElementFree( pmMod, peVal ); + peVal = NULL; + } + + if( poPT != NULL ) + { + SymCryptEcpointFree( pCurve, poPT ); + poPT = NULL; + } + + if( pmMod != NULL ) + { + SymCryptModulusFree( pmMod ); + pmMod = NULL; + } + + if( piTmp != NULL ) + { + SymCryptIntFree( piTmp ); + piTmp = NULL; + } + + if( pbScratch != NULL ) + { + SymCryptWipe( pbScratch, cbScratch ); + SymCryptCallbackFree( pbScratch ); + pbScratch = NULL; + } + + if( scError != SYMCRYPT_NO_ERROR ) + { + SymCrypt802_11SaeCustomDestroy( pState ); + } + + return scError; +} + +SYMCRYPT_ERROR +SymCrypt802_11SaeCustomInitH2E( + _Out_ PSYMCRYPT_802_11_SAE_CUSTOM_STATE pState, + _In_reads_( 64 ) PCBYTE pbPT, + _In_reads_( 6 ) PCBYTE pbMacA, + _In_reads_( 6 ) PCBYTE pbMacB, + _Inout_updates_opt_( 32 ) PBYTE pbRand, + _Inout_updates_opt_( 32 ) PBYTE pbMask ) +{ + return SymCrypt802_11SaeCustomInitH2EGeneric( pState, + SYMCRYPT_SAE_GROUP_19, + pbPT, + 64, + pbMacA, + pbMacB, + pbRand, + 32, + pbMask, + 32 ); +} + + +VOID +SymCrypt802_11SaeCustomDestroy( + _Inout_ PSYMCRYPT_802_11_SAE_CUSTOM_STATE pState ) +{ + PSYMCRYPT_ECURVE pCurve = pState->pCurve; + + if( pState->poPWE != NULL ) + { + SymCryptEcpointFree( pCurve, pState->poPWE ); + } + + if( pState->peMask != NULL ) + { + SymCryptModElementFree( pCurve->GOrd, pState->peMask ); + } + + if( pState->peRand != NULL ) + { + SymCryptModElementFree( pCurve->GOrd, pState->peRand ); + } + + if( pCurve != NULL ) + { + SymCryptEcurveFree( pCurve ); + } + + SymCryptWipeKnownSize( pState, sizeof( *pState ) ); +} + +SYMCRYPT_ERROR +SymCrypt802_11SaeCustomCommitCreateGeneric( + _In_ PCSYMCRYPT_802_11_SAE_CUSTOM_STATE pState, + _Out_writes_( cbCommitScalar ) PBYTE pbCommitScalar, + SIZE_T cbCommitScalar, + _Out_writes_( cbCommitElement ) PBYTE pbCommitElement, + SIZE_T cbCommitElement) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PSYMCRYPT_MODELEMENT peTmp = NULL; + PSYMCRYPT_INT piTmp = NULL; + PSYMCRYPT_ECPOINT poPoint = NULL; + PBYTE pbScratch = NULL; + SIZE_T cbScratch; + SIZE_T nDigits; + + PCSYMCRYPT_ECURVE pCurve = pState->pCurve; + + nDigits = SymCryptDigitsFromBits( pCurve->FModBitsize ); + cbScratch = SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigits ), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_SCALAR_ECURVE_OPERATIONS( pCurve ), + SYMCRYPT_SCRATCH_BYTES_FOR_GETSET_VALUE_ECURVE_OPERATIONS( pCurve ) ) ); + + pbScratch = SymCryptCallbackAlloc( cbScratch ); + + peTmp = SymCryptModElementAllocate( pCurve->GOrd ); + piTmp = SymCryptIntAllocate( SymCryptEcurveDigitsofScalarMultiplier( pCurve ) ); + poPoint = SymCryptEcpointAllocate( pCurve ); + + if( peTmp == NULL || piTmp == NULL || poPoint == NULL || pbScratch == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + SymCryptModAdd( pCurve->GOrd, pState->peRand, pState->peMask, peTmp, pbScratch, cbScratch ); + scError = SymCryptModElementGetValue( pCurve->GOrd, peTmp, pbCommitScalar, cbCommitScalar, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, pbScratch, cbScratch ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + SymCryptModElementToInt( pCurve->GOrd, pState->peMask, piTmp, pbScratch, cbScratch ); + scError = SymCryptEcpointScalarMul( pCurve, + piTmp, + pState->poPWE, + 0, + poPoint, + pbScratch, + cbScratch ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Now we have mask * PWE, but we need the negative... + SymCryptEcpointNegate( pCurve, poPoint, (UINT32)-1, pbScratch, cbScratch ); + + scError = SymCryptEcpointGetValue( pCurve, + poPoint, + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, + SYMCRYPT_ECPOINT_FORMAT_XY, + pbCommitElement, + cbCommitElement, + 0, + pbScratch, + cbScratch ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + +cleanup: + + if( piTmp != NULL ) + { + SymCryptIntFree( piTmp ); + piTmp = NULL; + } + + if( peTmp != NULL ) + { + SymCryptModElementFree( pCurve->GOrd, peTmp ); + peTmp = NULL; + } + + if( poPoint != NULL ) + { + SymCryptEcpointFree( pCurve, poPoint ); + poPoint = NULL; + } + + if( pbScratch != NULL ) + { + SymCryptWipe( pbScratch, cbScratch ); + SymCryptCallbackFree( pbScratch ); + pbScratch = NULL; + } + + return scError; +} + +SYMCRYPT_ERROR +SymCrypt802_11SaeCustomCommitCreate( + _In_ PCSYMCRYPT_802_11_SAE_CUSTOM_STATE pState, + _Out_writes_( 32 ) PBYTE pbCommitScalar, + _Out_writes_( 64 ) PBYTE pbCommitElement ) +{ + return SymCrypt802_11SaeCustomCommitCreateGeneric( pState, + pbCommitScalar, + 32, + pbCommitElement, + 64 ); +} + +SYMCRYPT_ERROR +SymCrypt802_11SaeCustomCommitProcessGeneric( + _In_ PCSYMCRYPT_802_11_SAE_CUSTOM_STATE pState, + _In_reads_( cbPeerCommitScalar ) PCBYTE pbPeerCommitScalar, + SIZE_T cbPeerCommitScalar, + _In_reads_( cbPeerCommitElement ) PCBYTE pbPeerCommitElement, + SIZE_T cbPeerCommitElement, + _Out_writes_( cbSharedSecret ) PBYTE pbSharedSecret, + SIZE_T cbSharedSecret, + _Out_writes_( cbScalarSum ) PBYTE pbScalarSum, + SIZE_T cbScalarSum ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PSYMCRYPT_ECURVE pCurve = pState->pCurve; + PSYMCRYPT_MODELEMENT peCommitScalarSum = NULL; + PSYMCRYPT_ECPOINT poPeerCommitElement = NULL; + PSYMCRYPT_ECPOINT poTmp = NULL; + PSYMCRYPT_INT piTmp = NULL; + UINT32 nDigits; + + PBYTE pbScratch = NULL; + SIZE_T cbScratch; + + nDigits = SymCryptDigitsFromBits( pCurve->FModBitsize ); + cbScratch = SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigits ), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_SCALAR_ECURVE_OPERATIONS( pCurve ), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS( pCurve ), + SYMCRYPT_SCRATCH_BYTES_FOR_GETSET_VALUE_ECURVE_OPERATIONS( pCurve ) ) ) ); + pbScratch = SymCryptCallbackAlloc( cbScratch ); + + peCommitScalarSum = SymCryptModElementAllocate( pCurve->GOrd ); + poPeerCommitElement = SymCryptEcpointAllocate( pCurve ); + poTmp = SymCryptEcpointAllocate( pCurve ); + piTmp = SymCryptIntAllocate( SymCryptEcurveDigitsofScalarMultiplier( pCurve ) ); + + if( pbScratch == NULL || peCommitScalarSum == NULL || poPeerCommitElement == NULL || poTmp == NULL || piTmp == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + // piTmp = peer commit value + scError = SymCryptIntSetValue( pbPeerCommitScalar, cbPeerCommitScalar, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, piTmp ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // The Standard requires a check that the Peer commit value must be 1 < peer-commit < r where r is the group order. + if( !SymCryptIntIsLessThan( piTmp, SymCryptIntFromModulus( pCurve->GOrd ) ) || + SymCryptIntIsEqualUint32( piTmp, 0 ) || + SymCryptIntIsEqualUint32( piTmp, 1 ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + SymCryptIntToModElement( piTmp, pCurve->GOrd, peCommitScalarSum, pbScratch, cbScratch ); + + // Now compute the sum of the scalar commit values + SymCryptModAdd( pCurve->GOrd, peCommitScalarSum, pState->peRand, peCommitScalarSum, pbScratch, cbScratch ); + SymCryptModAdd( pCurve->GOrd, peCommitScalarSum, pState->peMask, peCommitScalarSum, pbScratch, cbScratch ); + + scError = SymCryptEcpointSetValue( pCurve, + pbPeerCommitElement, + cbPeerCommitElement, + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, + SYMCRYPT_ECPOINT_FORMAT_XY, + poPeerCommitElement, + 0, + pbScratch, + cbScratch ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // The EcPointSetValue routine returns an error if either coordinate is >= P. + // We need to check that the point is on the curve and not the zero point of the curve + // (The zero point is sometimes called the 'point at infinity'.) + if( !SymCryptEcpointOnCurve( pCurve, poPeerCommitElement, pbScratch, cbScratch ) || + SymCryptEcpointIsZero( pCurve, poPeerCommitElement, pbScratch, cbScratch ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + + scError = SymCryptEcpointScalarMul( pCurve, + piTmp, + pState->poPWE, + 0, + poTmp, + pbScratch, + cbScratch ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + SymCryptEcpointAdd( pCurve, poTmp, poPeerCommitElement, poTmp, 0, pbScratch, cbScratch ); + + SymCryptModElementToInt( pCurve->GOrd, pState->peRand, piTmp, pbScratch, cbScratch ); + scError = SymCryptEcpointScalarMul( pCurve, + piTmp, + poTmp, + 0, + poTmp, + pbScratch, + cbScratch ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + scError = SymCryptEcpointGetValue( pCurve, + poTmp, + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, + SYMCRYPT_ECPOINT_FORMAT_X, + pbSharedSecret, + cbSharedSecret, + 0, + pbScratch, + cbScratch ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + scError = SymCryptModElementGetValue( pCurve->GOrd, peCommitScalarSum, pbScalarSum, cbScalarSum, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, pbScratch, cbScratch ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + +cleanup: + + if( peCommitScalarSum != NULL ) + { + SymCryptModElementFree( pCurve->GOrd, peCommitScalarSum ); + peCommitScalarSum = NULL; + } + + if( poPeerCommitElement != NULL ) + { + SymCryptEcpointFree( pCurve, poPeerCommitElement ); + poPeerCommitElement = NULL; + } + + if( poTmp != NULL ) + { + SymCryptEcpointFree( pCurve, poTmp ); + poTmp = NULL; + } + + if( piTmp != NULL ) + { + SymCryptIntFree( piTmp ); + piTmp = NULL; + } + + if( pbScratch != NULL ) + { + SymCryptWipe( pbScratch, cbScratch ); + SymCryptCallbackFree( pbScratch ); + pbScratch = NULL; + } + + return scError; +} + +SYMCRYPT_ERROR +SymCrypt802_11SaeCustomCommitProcess( + _In_ PCSYMCRYPT_802_11_SAE_CUSTOM_STATE pState, + _In_reads_( 32 ) PCBYTE pbPeerCommitScalar, + _In_reads_( 64 ) PCBYTE pbPeerCommitElement, + _Out_writes_( 32 ) PBYTE pbSharedSecret, + _Out_writes_( 32 ) PBYTE pbScalarSum ) +{ + return SymCrypt802_11SaeCustomCommitProcessGeneric( pState, + pbPeerCommitScalar, + 32, + pbPeerCommitElement, + 64, + pbSharedSecret, + 32, + pbScalarSum, + 32 ); +} diff --git a/libs/symcrypt/lib/ScsTable.c b/libs/symcrypt/lib/ScsTable.c new file mode 100644 index 00000000000..e9e782d4f43 --- /dev/null +++ b/libs/symcrypt/lib/ScsTable.c @@ -0,0 +1,388 @@ +// +// ScsTable.c +// Side-channel safe table +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// +// These functions implement an table of large elements. +// Reading an element from the table is done in a way that does not reveal the +// element accessed through memory side channels. +// Basically, the whole table is read by the CPU, and the required data is selected +// using boolean operations. +// + +#include "precomp.h" + +// +// Items are multiple of SYMCRYPT_DIGIT_SIZE long. +// +// Format: +// The memory format is parameterized for optimal implementations on several +// different architectures. +// +// The following parameters define the format: +// - group_size +// - interleave_size +// +// Let nElements be the number of elements in the table. +// If necessary, the size of each element in the table is rounded up to a multiple of interleave_size. +// Each whole group of group_size elements is interleaved with each other. +// The last (nElements % group_size) elements are simply stored consecutively. +// (For now we simply require that nElements is a multiple of group_size.) +// Within each group of group_size, the data for the elements are interleaved in natural order +// using chunks of interleave_size bytes. +// +// The choice of group_size and interleave_size depends on the CPU architecture, CPU features, +// and even the element size. (E.g. 1024-bit elements might interleave @ 64 bytes on an AVX512 +// capable CPU, but 256-bit elements would have to interleave at 16 or 32 bytes on that same CPU.) +// + +// Currently these are constants as that allows easier optimizations... +#if SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM64 +#define SYMCRYPT_SCSTABLE_USE64 1 +#define SYMCRYPT_SCSTABLE_INTERLEAVE_SIZE 32 +#define SYMCRYPT_SCSTABLE_GROUP_SIZE 4 +typedef UINT64 SYMCRYPT_SCSTABLE_TYPE; +#else +#define SYMCRYPT_SCSTABLE_USE64 0 +#define SYMCRYPT_SCSTABLE_INTERLEAVE_SIZE 16 +#define SYMCRYPT_SCSTABLE_GROUP_SIZE 4 +typedef UINT32 SYMCRYPT_SCSTABLE_TYPE; +#endif + +UINT32 +SYMCRYPT_CALL +SymCryptScsTableInit( + _Out_ PSYMCRYPT_SCSTABLE pScsTable, + UINT32 nElements, + UINT32 elementSize ) +{ + UINT32 groupSize; + UINT32 interleaveSize; + UINT32 cbBuffer; + + SYMCRYPT_ASSERT( nElements > 0 ); + +#pragma warning( suppress: 4127 ) // conditional expression is constant + if( SYMCRYPT_CPU_AMD64 && elementSize == 128 ) + { + // Highly optimized assembler mode for 1024-bit entries for RSA-2048... + interleaveSize = 128; + groupSize = 1; + } else { + // Standard C implementation + interleaveSize = SYMCRYPT_SCSTABLE_INTERLEAVE_SIZE; + groupSize = SYMCRYPT_SCSTABLE_GROUP_SIZE; + } + + // Right now, we limit ourselves to element sizes that are a multiple of the interleaveSize and + // # elements that are a multiple of the group size. + // We also limit ourselves to sensible input sizes + SYMCRYPT_ASSERT( elementSize % interleaveSize == 0 && nElements % groupSize == 0 && (elementSize | nElements) < (1 << 16) && elementSize > 0 ); + + cbBuffer = elementSize * nElements; // Each factor is < 2^16, so there is no overflow in the mul + + pScsTable->groupSize = groupSize; + pScsTable->interleaveSize = interleaveSize; + pScsTable->nElements = nElements; + pScsTable->elementSize = elementSize; + pScsTable->cbTableData = cbBuffer; + pScsTable->pbTableData = NULL; + + return cbBuffer; +} + +VOID +SYMCRYPT_CALL +SymCryptScsTableSetBuffer( + _Inout_ PSYMCRYPT_SCSTABLE pScsTable, + _Inout_updates_bytes_( cbBuffer ) PBYTE pbBuffer, + UINT32 cbBuffer ) +{ + SYMCRYPT_ASSERT(cbBuffer >= pScsTable->cbTableData); + UNREFERENCED_PARAMETER( cbBuffer ); + + pScsTable->pbTableData = pbBuffer; +} + + +C_ASSERT( SYMCRYPT_SCSTABLE_INTERLEAVE_SIZE == 16 || SYMCRYPT_SCSTABLE_INTERLEAVE_SIZE == 32 ); +// check that an interleave size is exactly 4 words +C_ASSERT( SYMCRYPT_SCSTABLE_INTERLEAVE_SIZE == 4 * sizeof( SYMCRYPT_SCSTABLE_TYPE ) ); + +VOID +SYMCRYPT_CALL +SymCryptScsTableStoreC( + _Inout_ PSYMCRYPT_SCSTABLE pScsTable, + UINT32 iIndex, + _In_reads_bytes_( cbData ) PCBYTE pbData, + UINT32 cbData ) +{ + UINT32 groupSize = SYMCRYPT_SCSTABLE_GROUP_SIZE; + UINT32 interleaveSize = SYMCRYPT_SCSTABLE_INTERLEAVE_SIZE; + UINT32 elementSize = pScsTable->elementSize; + UINT32 groupOffset; + + SYMCRYPT_ASSERT( groupSize == pScsTable->groupSize ); + SYMCRYPT_ASSERT( interleaveSize == pScsTable->interleaveSize ); + + SYMCRYPT_ASSERT( cbData == elementSize ); + UNREFERENCED_PARAMETER( cbData ); + + SYMCRYPT_ASSERT(iIndex < pScsTable->nElements); + + groupOffset = iIndex % groupSize; + + // dcl - document why this can't be an integer overflow + SYMCRYPT_SCSTABLE_TYPE * pDst = (SYMCRYPT_SCSTABLE_TYPE *) (pScsTable->pbTableData + (iIndex - groupOffset) * elementSize + groupOffset * interleaveSize); + SYMCRYPT_SCSTABLE_TYPE * pSrc = (SYMCRYPT_SCSTABLE_TYPE *) pbData; + + UINT32 nInterleaves = elementSize / interleaveSize; + + do + { + pDst[0] = pSrc[0]; + pDst[1] = pSrc[1]; + pDst[2] = pSrc[2]; + pDst[3] = pSrc[3]; + + pDst += interleaveSize * groupSize / sizeof( *pDst ); + pSrc += interleaveSize / sizeof( *pSrc ); + nInterleaves--; + } while( nInterleaves > 0 ); + +} + +#if SYMCRYPT_CPU_AMD64 +VOID +SYMCRYPT_CALL +SymCryptScsTableStore128Xmm( + _Inout_ PSYMCRYPT_SCSTABLE pScsTable, + UINT32 iIndex, + _In_reads_bytes_( cbData ) PCBYTE pbData, + UINT32 cbData ) +{ + __m128i * pDst = (__m128i *) (pScsTable->pbTableData + iIndex * 128); + __m128i * pSrc = (__m128i *) pbData; + + SYMCRYPT_ASSERT( cbData == 128 && pScsTable->elementSize == 128 && iIndex < pScsTable->nElements && pScsTable->groupSize == 1 ); + UNREFERENCED_PARAMETER( cbData ); + + pDst[0] = pSrc[0]; + pDst[1] = pSrc[1]; + pDst[2] = pSrc[2]; + pDst[3] = pSrc[3]; + pDst[4] = pSrc[4]; + pDst[5] = pSrc[5]; + pDst[6] = pSrc[6]; + pDst[7] = pSrc[7]; +} +#endif // AMD64 + +VOID +SYMCRYPT_CALL +SymCryptScsTableLoadC( + _In_ PSYMCRYPT_SCSTABLE pScsTable, + UINT32 iIndex, + _Out_writes_bytes_(cbData) PBYTE pbData, + UINT32 cbData ) +{ + UINT32 groupSize = SYMCRYPT_SCSTABLE_GROUP_SIZE; + UINT32 interleaveSize = SYMCRYPT_SCSTABLE_INTERLEAVE_SIZE; + UINT32 elementSize = pScsTable->elementSize; + + SYMCRYPT_SCSTABLE_TYPE mask0, mask1, mask2, mask3; + UINT32 i; + UINT32 j; + UINT32 nElements = pScsTable->nElements; + + const SYMCRYPT_SCSTABLE_TYPE * pSrc = (SYMCRYPT_SCSTABLE_TYPE *) pScsTable->pbTableData; + SYMCRYPT_SCSTABLE_TYPE * pDst = (SYMCRYPT_SCSTABLE_TYPE *) pbData; + SYMCRYPT_SCSTABLE_TYPE * pD; + + UINT32 nInterleaves = elementSize / interleaveSize; + + + SYMCRYPT_ASSERT( groupSize == pScsTable->groupSize ); + SYMCRYPT_ASSERT( interleaveSize == pScsTable->interleaveSize ); + + SYMCRYPT_ASSERT( cbData >= sizeof( SYMCRYPT_SCSTABLE_TYPE ) * SYMCRYPT_SCSTABLE_GROUP_SIZE ); + SYMCRYPT_ASSERT( cbData == pScsTable->elementSize ); + UNREFERENCED_PARAMETER( cbData ); + +#if SYMCRYPT_SCSTABLE_USE64 +#define SCS_MASK_EQUAL32( _a, _b ) ( ~(UINT64) ((INT64) ((UINT64)0 - (_a ^ _b)) >> 32 ) ) +#else +#define SCS_MASK_EQUAL32( _a, _b ) (SYMCRYPT_MASK32_EQ( _a, _b )) +#endif + + i = 0; + + mask0 = SCS_MASK_EQUAL32( i+0, iIndex ); + mask1 = SCS_MASK_EQUAL32( i+1, iIndex ); + mask2 = SCS_MASK_EQUAL32( i+2, iIndex ); + mask3 = SCS_MASK_EQUAL32( i+3, iIndex ); + + j = nInterleaves; + pD = pDst; + + do { + pD[0] = (mask0 & pSrc[0]) | (mask1 & pSrc[4]) | (mask2 & pSrc[ 8]) | (mask3 & pSrc[12]); + pD[1] = (mask0 & pSrc[1]) | (mask1 & pSrc[5]) | (mask2 & pSrc[ 9]) | (mask3 & pSrc[13]); + pD[2] = (mask0 & pSrc[2]) | (mask1 & pSrc[6]) | (mask2 & pSrc[10]) | (mask3 & pSrc[14]); + pD[3] = (mask0 & pSrc[3]) | (mask1 & pSrc[7]) | (mask2 & pSrc[11]) | (mask3 & pSrc[15]); + pD += interleaveSize / sizeof( *pD ); + pSrc += interleaveSize * groupSize / sizeof( *pSrc ); + j--; + } while( j > 0 ); + + i += groupSize; + + while (i + groupSize <= nElements) + { + + mask0 = SCS_MASK_EQUAL32( i+0, iIndex ); + mask1 = SCS_MASK_EQUAL32( i+1, iIndex ); + mask2 = SCS_MASK_EQUAL32( i+2, iIndex ); + mask3 = SCS_MASK_EQUAL32( i+3, iIndex ); + + j = nInterleaves; + pD = pDst; + + do { + pD[0] |= (mask0 & pSrc[0]) | (mask1 & pSrc[4]) | (mask2 & pSrc[ 8]) | (mask3 & pSrc[12]); + pD[1] |= (mask0 & pSrc[1]) | (mask1 & pSrc[5]) | (mask2 & pSrc[ 9]) | (mask3 & pSrc[13]); + pD[2] |= (mask0 & pSrc[2]) | (mask1 & pSrc[6]) | (mask2 & pSrc[10]) | (mask3 & pSrc[14]); + pD[3] |= (mask0 & pSrc[3]) | (mask1 & pSrc[7]) | (mask2 & pSrc[11]) | (mask3 & pSrc[15]); + pD += interleaveSize / sizeof( *pD ); + pSrc += interleaveSize * groupSize / sizeof( *pSrc ); + j--; + } while( j > 0 ); + + i += groupSize; + } +} + +#if SYMCRYPT_CPU_AMD64 +VOID +SYMCRYPT_CALL +SymCryptScsTableLoad128Xmm( + _In_ PSYMCRYPT_SCSTABLE pScsTable, + UINT32 iIndex, + _Out_writes_bytes_(cbData) PBYTE pbData, + UINT32 cbData ) +{ + UINT32 nElements = pScsTable->nElements; + + __m128i R0, R1, R2, R3, R4, R5, R6, R7; + __m128i T0, T1; + + __m128i Count = _mm_setzero_si128(); + __m128i Ones = _mm_set_epi32( 1, 1, 1, 1 ); + __m128i Entry = _mm_set_epi32( iIndex, iIndex, iIndex, iIndex ); + __m128i Mask; + __m128i * pSrc = (__m128i *) pScsTable->pbTableData; + __m128i * pDst = (__m128i *) pbData; + + SYMCRYPT_ASSERT( cbData == 128 && pScsTable->elementSize == 128 && iIndex < pScsTable->nElements && pScsTable->groupSize == 1 ); + UNREFERENCED_PARAMETER( cbData ); + + Mask = _mm_cmpeq_epi32( Count, Entry ); + Count = _mm_add_epi32( Count, Ones ); + + R0 = _mm_and_si128( Mask, pSrc[0] ); + R1 = _mm_and_si128( Mask, pSrc[1] ); + R2 = _mm_and_si128( Mask, pSrc[2] ); + R3 = _mm_and_si128( Mask, pSrc[3] ); + R4 = _mm_and_si128( Mask, pSrc[4] ); + R5 = _mm_and_si128( Mask, pSrc[5] ); + R6 = _mm_and_si128( Mask, pSrc[6] ); + R7 = _mm_and_si128( Mask, pSrc[7] ); + + pSrc += 8; + + while( --nElements > 0 ) + { + Mask = _mm_cmpeq_epi32( Count, Entry ); + Count = _mm_add_epi32( Count, Ones ); + + T0 = _mm_and_si128( Mask, pSrc[0] ); R0 = _mm_or_si128( R0, T0 ); + T1 = _mm_and_si128( Mask, pSrc[1] ); R1 = _mm_or_si128( R1, T1 ); + T0 = _mm_and_si128( Mask, pSrc[2] ); R2 = _mm_or_si128( R2, T0 ); + T1 = _mm_and_si128( Mask, pSrc[3] ); R3 = _mm_or_si128( R3, T1 ); + T0 = _mm_and_si128( Mask, pSrc[4] ); R4 = _mm_or_si128( R4, T0 ); + T1 = _mm_and_si128( Mask, pSrc[5] ); R5 = _mm_or_si128( R5, T1 ); + T0 = _mm_and_si128( Mask, pSrc[6] ); R6 = _mm_or_si128( R6, T0 ); + T1 = _mm_and_si128( Mask, pSrc[7] ); R7 = _mm_or_si128( R7, T1 ); + pSrc += 8; + } + + pDst[0] = R0; + pDst[1] = R1; + pDst[2] = R2; + pDst[3] = R3; + pDst[4] = R4; + pDst[5] = R5; + pDst[6] = R6; + pDst[7] = R7; +} +#endif // AMD64 + +VOID +SYMCRYPT_CALL +SymCryptScsTableStore( + _Inout_ PSYMCRYPT_SCSTABLE pScsTable, + UINT32 iIndex, + _In_reads_bytes_( cbData ) PCBYTE pbData, + UINT32 cbData ) +{ +#if SYMCRYPT_CPU_AMD64 + + if( pScsTable->elementSize == 128 ) + { + SymCryptScsTableStore128Xmm( pScsTable, iIndex, pbData, cbData ); + } else { + SymCryptScsTableStoreC( pScsTable, iIndex, pbData, cbData ); + } + +#else + + SymCryptScsTableStoreC( pScsTable, iIndex, pbData, cbData ); + +#endif +} + +VOID +SYMCRYPT_CALL +SymCryptScsTableLoad( + _In_ PSYMCRYPT_SCSTABLE pScsTable, + UINT32 iIndex, + _Out_writes_bytes_(cbData) PBYTE pbData, + UINT32 cbData ) +{ + // This is the side-channel safe routine + +#if SYMCRYPT_CPU_AMD64 + + if( pScsTable->elementSize == 128 ) + { + SymCryptScsTableLoad128Xmm( pScsTable, iIndex, pbData, cbData ); + } else { + SymCryptScsTableLoadC( pScsTable, iIndex, pbData, cbData ); + } + +#else + + SymCryptScsTableLoadC( pScsTable, iIndex, pbData, cbData ); + +#endif +} + +VOID +SYMCRYPT_CALL +SymCryptScsTableWipe( + _Inout_ PSYMCRYPT_SCSTABLE pScsTable ) +{ + SymCryptWipe( pScsTable->pbTableData, pScsTable->cbTableData ); +} diff --git a/libs/symcrypt/lib/a_dispatch.c b/libs/symcrypt/lib/a_dispatch.c new file mode 100644 index 00000000000..53eee734c3b --- /dev/null +++ b/libs/symcrypt/lib/a_dispatch.c @@ -0,0 +1,1028 @@ +// +// a_dispatch.c Dispatch between different arithmetic format implementations. +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// SymCrypt can have multiple implementations of the arithmetic operations, and these can +// have incompatible formats used to store the integers. +// This file contains logic to dispatch between these incompatible formats. +// Currently all implementations use the default format, or "Fdef". +// + +#include "precomp.h" + +// +// Define the FDEF dispatch table here. +// This should eventually be split out so that different users of the library can use different +// table sets & implementation choice functions. +// + + +const SYMCRYPT_MODULAR_FUNCTIONS g_SymCryptModFns[] = { + SYMCRYPT_MOD_FUNCTIONS_FDEF_GENERIC, // Handles any type of modulus + SYMCRYPT_MOD_FUNCTIONS_FDEF_MONTGOMERY, // Montgomery, only for odd parity-public moduli + +#if 0 && SYMCRYPT_CPU_AMD64 + + SYMCRYPT_MOD_FUNCTIONS_FDEF369_MONTGOMERY, // optimized for 384 and 576-bit moduli + SYMCRYPT_MOD_FUNCTIONS_FDEF_MONTGOMERY_MULX256, // Special faster code for 256-bit Montgomery moduli, MULX-based code + SYMCRYPT_MOD_FUNCTIONS_FDEF_MONTGOMERY_MULXP384, // Special faster code for P-384 field modulus, MULX-based code + SYMCRYPT_MOD_FUNCTIONS_FDEF_MONTGOMERY_MULX, // MULX-based code, for any size (digit size = 512 bits) + SYMCRYPT_MOD_FUNCTIONS_FDEF_MONTGOMERY_MULX1024, // Special faster code for 1024-bit Montgomery moduli, MULX-based code + {NULL,}, + + // SYMCRYPT_MOD_FUNCTIONS_FDEF_MONTGOMERY_MULXP256, // Special faster code for P-256 field modulus, MULX-based code + // SYMCRYPT_MOD_FUNCTIONS_FDEF_MONTGOMERY_MULX384, // Special faster code for 384-bit Montgomery moduli, MULX-based code + // SYMCRYPT_MOD_FUNCTIONS_FDEF_MONTGOMERY256, // Special faster code for 256-bit Montgomery moduli + // SYMCRYPT_MOD_FUNCTIONS_FDEF_MONTGOMERY512, // Special faster code for 512-bit Montgomery moduli + // SYMCRYPT_MOD_FUNCTIONS_FDEF_MONTGOMERY1024, // Special faster code for 1024-bit Montgomery moduli + +#elif 0 && SYMCRYPT_CPU_ARM64 + + SYMCRYPT_MOD_FUNCTIONS_FDEF369_MONTGOMERY, + SYMCRYPT_MOD_FUNCTIONS_FDEF_MONTGOMERY_ARM64256, + SYMCRYPT_MOD_FUNCTIONS_FDEF_MONTGOMERY_ARM64P384, + {NULL,}, + {NULL,}, + {NULL,}, + +#endif +}; + +#define SymCryptModLabel(_label) (_label << 16) +#define SymCryptModFntableGeneric (SymCryptModLabel('gM') + (0 * SYMCRYPT_MODULAR_FUNCTIONS_SIZE)) +#define SymCryptModFntableMontgomery (SymCryptModLabel('mM') + (1 * SYMCRYPT_MODULAR_FUNCTIONS_SIZE)) +#define SymCryptModFntable369Montgomery (SymCryptModLabel('9m') + (2 * SYMCRYPT_MODULAR_FUNCTIONS_SIZE)) +#define SymCryptModFntableMontgomeryMulx256 (SymCryptModLabel('2x') + (3 * SYMCRYPT_MODULAR_FUNCTIONS_SIZE)) +#define SymCryptModFntableMontgomeryMulxP384 (SymCryptModLabel('3n') + (4 * SYMCRYPT_MODULAR_FUNCTIONS_SIZE)) +#define SymCryptModFntableMontgomeryMulx (SymCryptModLabel('xM') + (5 * SYMCRYPT_MODULAR_FUNCTIONS_SIZE)) +#define SymCryptModFntableMontgomeryMulx1024 (SymCryptModLabel('1x') + (6 * SYMCRYPT_MODULAR_FUNCTIONS_SIZE)) + +#define SymCryptModFntableMontgomeryArm64256 (SymCryptModLabel('2m') + (3 * SYMCRYPT_MODULAR_FUNCTIONS_SIZE)) +#define SymCryptModFntableMontgomeryArm64P384 (SymCryptModLabel('3n') + (4 * SYMCRYPT_MODULAR_FUNCTIONS_SIZE)) + +// #define SymCryptModFntableMontgomeryMulxP256 (SymCryptModLabel('2n') + (xx * SYMCRYPT_MODULAR_FUNCTIONS_SIZE)) +// #define SymCryptModFntableMontgomeryMulx384 (SymCryptModLabel('3x') + (xx * SYMCRYPT_MODULAR_FUNCTIONS_SIZE)) +// #define SymCryptModFntableMontgomery256 (SymCryptModLabel('2m') + (xx * SYMCRYPT_MODULAR_FUNCTIONS_SIZE)) +// #define SymCryptModFntableMontgomery512 (SymCryptModLabel('5m') + (xx * SYMCRYPT_MODULAR_FUNCTIONS_SIZE)) +// #define SymCryptModFntableMontgomery1024 (SymCryptModLabel('1m') + (xx * SYMCRYPT_MODULAR_FUNCTIONS_SIZE)) + +C_ASSERT( (sizeof( g_SymCryptModFns ) & (sizeof( g_SymCryptModFns) - 1 )) == 0 ); // size of the table must be a power of 2 to be CFG-safe. + +const UINT32 g_SymCryptModFnsMask = sizeof( g_SymCryptModFns ) - sizeof( g_SymCryptModFns[0] ); + +// +// Tweaking the selection & function tables allows different tradeoffs of performance vs codesize +// +const SYMCRYPT_MODULUS_TYPE_SELECTION_ENTRY SymCryptModulusTypeSelections[] = +{ +#if 0 && SYMCRYPT_CPU_AMD64 + // Mulx used for 0-512 and 577-... bits + {SymCryptModFntableMontgomeryMulxP384, SYMCRYPT_CPU_FEATURES_FOR_MULX, 384, SYMCRYPT_MODULUS_FEATURE_MONTGOMERY | SYMCRYPT_MODULUS_FEATURE_NISTP384 }, + {SymCryptModFntableMontgomeryMulx256, SYMCRYPT_CPU_FEATURES_FOR_MULX, 256, SYMCRYPT_MODULUS_FEATURE_MONTGOMERY }, + {SymCryptModFntableMontgomeryMulx, SYMCRYPT_CPU_FEATURES_FOR_MULX, 512, SYMCRYPT_MODULUS_FEATURE_MONTGOMERY }, + {SymCryptModFntable369Montgomery, 0, 384, SYMCRYPT_MODULUS_FEATURE_MONTGOMERY }, + {SymCryptModFntableMontgomery, 0, 512, SYMCRYPT_MODULUS_FEATURE_MONTGOMERY }, + {SymCryptModFntable369Montgomery, 0, 576, SYMCRYPT_MODULUS_FEATURE_MONTGOMERY }, + {SymCryptModFntableMontgomeryMulx1024, SYMCRYPT_CPU_FEATURES_FOR_MULX, 1024, SYMCRYPT_MODULUS_FEATURE_MONTGOMERY }, + {SymCryptModFntableMontgomeryMulx, SYMCRYPT_CPU_FEATURES_FOR_MULX, 0, SYMCRYPT_MODULUS_FEATURE_MONTGOMERY }, + +#elif 0 && SYMCRYPT_CPU_ARM64 + + {SymCryptModFntableMontgomeryArm64P384, 0, 384, SYMCRYPT_MODULUS_FEATURE_MONTGOMERY | SYMCRYPT_MODULUS_FEATURE_NISTP384 }, + {SymCryptModFntableMontgomeryArm64256, 0, 256, SYMCRYPT_MODULUS_FEATURE_MONTGOMERY }, + {SymCryptModFntable369Montgomery, 0, 384, SYMCRYPT_MODULUS_FEATURE_MONTGOMERY }, + {SymCryptModFntableMontgomery, 0, 512, SYMCRYPT_MODULUS_FEATURE_MONTGOMERY }, + {SymCryptModFntable369Montgomery, 0, 576, SYMCRYPT_MODULUS_FEATURE_MONTGOMERY }, + +#endif + + {SymCryptModFntableMontgomery, 0, 0, SYMCRYPT_MODULUS_FEATURE_MONTGOMERY }, + {SymCryptModFntableGeneric, 0, 0, 0 }, + // This last entry always matches, so the code never falls off the end of this table. +}; + + +// +// At the moment there is only the default number format. +// + +UINT32 +SymCryptDigitsFromBits( UINT32 nBits ) +{ + return SymCryptFdefDigitsFromBits( nBits ); +} + + +PSYMCRYPT_INT +SYMCRYPT_CALL +SymCryptIntAllocate( UINT32 nDigits ) +{ + return SymCryptFdefIntAllocate( nDigits ); +} + +VOID +SYMCRYPT_CALL +SymCryptIntFree( _Out_ PSYMCRYPT_INT piObj ) +{ + SymCryptIntWipe( piObj ); + SymCryptCallbackFree( piObj ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptSizeofIntFromDigits( UINT32 nDigits ) +{ + return SymCryptFdefSizeofIntFromDigits( nDigits ); +} + +PSYMCRYPT_INT +SYMCRYPT_CALL +SymCryptIntCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + UINT32 nDigits ) +{ + return SymCryptFdefIntCreate( pbBuffer, cbBuffer, nDigits ); +} + +VOID +SYMCRYPT_CALL +SymCryptIntWipe( _Out_ PSYMCRYPT_INT piDst ) +{ + SYMCRYPT_CHECK_MAGIC( piDst ); + + // Wipe the whole structure in one go; + SymCryptWipe( piDst, piDst->cbSize ); +} + +VOID +SYMCRYPT_CALL +SymCryptIntCopy( + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_INT piDst ) +{ + SymCryptFdefIntCopy( piSrc, piDst ); +} + +VOID +SYMCRYPT_CALL +SymCryptIntMaskedCopy( + _In_ PCSYMCRYPT_INT piSrc, + _Inout_ PSYMCRYPT_INT piDst, + UINT32 mask ) +{ + SymCryptFdefIntMaskedCopy( piSrc, piDst, mask ); +} + +VOID +SYMCRYPT_CALL +SymCryptIntConditionalCopy( + _In_ PCSYMCRYPT_INT piSrc, + _Inout_ PSYMCRYPT_INT piDst, + UINT32 cond ) +{ + SymCryptFdefIntConditionalCopy( piSrc, piDst, cond ); +} + +VOID +SYMCRYPT_CALL +SymCryptIntConditionalSwap( + _Inout_ PSYMCRYPT_INT piSrc1, + _Inout_ PSYMCRYPT_INT piSrc2, + UINT32 cond ) +{ + SymCryptFdefIntConditionalSwap( piSrc1, piSrc2, cond ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptIntBitsizeOfObject( _In_ PCSYMCRYPT_INT piSrc ) +{ + return SymCryptFdefIntBitsizeOfObject( piSrc ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptIntDigitsizeOfObject( _In_ PCSYMCRYPT_INT piSrc ) +{ + return piSrc->nDigits; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptIntCopyMixedSize( + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_INT piDst ) +{ + return SymCryptFdefIntCopyMixedSize( piSrc, piDst ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptIntBitsizeOfValue( _In_ PCSYMCRYPT_INT piSrc ) +{ + return SymCryptFdefIntBitsizeOfValue( piSrc ); +} + +VOID +SYMCRYPT_CALL +SymCryptIntSetValueUint32( + UINT32 u32Src, + _Out_ PSYMCRYPT_INT piDst ) +{ + SymCryptFdefIntSetValueUint32( u32Src, piDst ); +} + +VOID +SYMCRYPT_CALL +SymCryptIntSetValueUint64( + UINT64 u64Src, + _Out_ PSYMCRYPT_INT piDst ) +{ + SymCryptFdefIntSetValueUint64( u64Src, piDst ); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptIntSetValue( + _In_reads_bytes_(cbSrc) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_NUMBER_FORMAT format, + _Out_ PSYMCRYPT_INT piDst ) +{ + return SymCryptFdefIntSetValue( pbSrc, cbSrc, format, piDst ); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptIntGetValue( + _In_ PCSYMCRYPT_INT piSrc, + _Out_writes_bytes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst, + SYMCRYPT_NUMBER_FORMAT format ) +{ + return SymCryptFdefIntGetValue( piSrc, pbDst, cbDst, format ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptIntGetValueLsbits32( _In_ PCSYMCRYPT_INT piSrc ) +{ + return SymCryptFdefIntGetValueLsbits32( piSrc ); +} + +UINT64 +SYMCRYPT_CALL +SymCryptIntGetValueLsbits64( _In_ PCSYMCRYPT_INT piSrc ) +{ + return SymCryptFdefIntGetValueLsbits64( piSrc ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptIntAddUint32( + _In_ PCSYMCRYPT_INT piSrc1, + UINT32 u32Src2, + _Out_ PSYMCRYPT_INT piDst ) +{ + return SymCryptFdefIntAddUint32( piSrc1, u32Src2, piDst ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptIntAddSameSize( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + _Out_ PSYMCRYPT_INT piDst ) +{ + return SymCryptFdefIntAddSameSize( piSrc1, piSrc2, piDst ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptIntAddMixedSize( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + _Out_ PSYMCRYPT_INT piDst ) +{ + return SymCryptFdefIntAddMixedSize( piSrc1, piSrc2, piDst ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptIntSubUint32( + _In_ PCSYMCRYPT_INT piSrc1, + UINT32 u32Src2, + _Out_ PSYMCRYPT_INT piDst ) +{ + return SymCryptFdefIntSubUint32( piSrc1, u32Src2, piDst ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptIntSubSameSize( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + _Out_ PSYMCRYPT_INT piDst ) +{ + return SymCryptFdefIntSubSameSize( piSrc1, piSrc2, piDst ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptIntSubMixedSize( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + _Out_ PSYMCRYPT_INT piDst ) +{ + return SymCryptFdefIntSubMixedSize( piSrc1, piSrc2, piDst ); +} + +VOID +SYMCRYPT_CALL +SymCryptIntNeg( + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_INT piDst ) +{ + SymCryptFdefIntNeg( piSrc, piDst ); +} + + +VOID +SYMCRYPT_CALL +SymCryptIntMulPow2( + _In_ PCSYMCRYPT_INT piSrc, + SIZE_T exp, + _Out_ PSYMCRYPT_INT piDst ) +{ + SymCryptFdefIntMulPow2( piSrc, exp, piDst ); +} + +VOID +SYMCRYPT_CALL +SymCryptIntDivPow2( + _In_ PCSYMCRYPT_INT piSrc, + SIZE_T exp, + _Out_ PSYMCRYPT_INT piDst ) +{ + SymCryptFdefIntDivPow2( piSrc, exp, piDst ); +} + +VOID +SYMCRYPT_CALL +SymCryptIntShr1( + UINT32 highestBit, + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_INT piDst ) +{ + SymCryptFdefIntShr1( highestBit, piSrc, piDst ); +} + +VOID +SYMCRYPT_CALL +SymCryptIntModPow2( + _In_ PCSYMCRYPT_INT piSrc, + SIZE_T exp, + _Out_ PSYMCRYPT_INT piDst ) +{ + SymCryptFdefIntModPow2( piSrc, exp, piDst ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptIntGetBit( + _In_ PCSYMCRYPT_INT piSrc, + UINT32 iBit ) +{ + return SymCryptFdefIntGetBit( piSrc, iBit ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptIntGetBits( + _In_ PCSYMCRYPT_INT piSrc, + UINT32 iBit, + UINT32 nBits ) +{ + return SymCryptFdefIntGetBits( piSrc, iBit, nBits ); +} + +VOID +SYMCRYPT_CALL +SymCryptIntSetBits( + _In_ PSYMCRYPT_INT piDst, + UINT32 value, + UINT32 iBit, + UINT32 nBits ) +{ + SymCryptFdefIntSetBits( piDst, value, iBit, nBits ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptIntIsEqualUint32( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ UINT32 u32Src2 ) +{ + return SymCryptFdefIntIsEqualUint32( piSrc1, u32Src2 ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptIntIsEqual( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2 ) +{ + return SymCryptFdefIntIsEqual( piSrc1, piSrc2 ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptIntIsLessThan( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2 ) +{ + return SymCryptFdefIntIsLessThan( piSrc1, piSrc2 ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptIntMulUint32( + _In_ PCSYMCRYPT_INT piSrc1, + UINT32 Src2, + _Out_ PSYMCRYPT_INT piDst ) +{ + return SymCryptFdefIntMulUint32( piSrc1, Src2, piDst ); +} + +VOID +SYMCRYPT_CALL +SymCryptIntMulSameSize( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + _Out_ PSYMCRYPT_INT piDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SymCryptFdefIntMulSameSize( piSrc1, piSrc2, piDst, pbScratch, cbScratch ); +} + + +VOID +SYMCRYPT_CALL +SymCryptIntSquare( + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_INT piDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SymCryptFdefIntSquare( piSrc, piDst, pbScratch, cbScratch ); +} + +VOID +SYMCRYPT_CALL +SymCryptIntMulMixedSize( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + _Out_ PSYMCRYPT_INT piDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SymCryptFdefIntMulMixedSize( piSrc1, piSrc2, piDst, pbScratch, cbScratch ); +} + +PSYMCRYPT_DIVISOR +SYMCRYPT_CALL +SymCryptDivisorAllocate( UINT32 nDigits ) +{ + return SymCryptFdefDivisorAllocate( nDigits ); +} + +VOID +SYMCRYPT_CALL +SymCryptDivisorFree( _Out_ PSYMCRYPT_DIVISOR pdObj ) +{ + SymCryptDivisorWipe( pdObj ); + SymCryptCallbackFree( pdObj ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptSizeofDivisorFromDigits( UINT32 nDigits ) +{ + return SymCryptFdefSizeofDivisorFromDigits( nDigits ); +} + +PSYMCRYPT_DIVISOR +SYMCRYPT_CALL +SymCryptDivisorCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + UINT32 nDigits ) +{ + return SymCryptFdefDivisorCreate( pbBuffer, cbBuffer, nDigits ); +} + +VOID +SYMCRYPT_CALL +SymCryptDivisorWipe( _Out_ PSYMCRYPT_DIVISOR pdObj ) +{ + SYMCRYPT_CHECK_MAGIC( pdObj ); + + SymCryptWipe( pdObj, pdObj->cbSize ); +} + +VOID +SymCryptDivisorCopy( + _In_ PCSYMCRYPT_DIVISOR pdSrc, + _Out_ PSYMCRYPT_DIVISOR pdDst ) +{ + SymCryptFdefDivisorCopy( pdSrc, pdDst ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptDivisorDigitsizeOfObject( _In_ PCSYMCRYPT_DIVISOR pdSrc ) +{ + return pdSrc->nDigits; +} + +PSYMCRYPT_INT +SYMCRYPT_CALL +SymCryptIntFromDivisor( _In_ PSYMCRYPT_DIVISOR pdSrc ) +{ + return SymCryptFdefIntFromDivisor( pdSrc ); +} + +VOID +SYMCRYPT_CALL +SymCryptIntToDivisor( + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_DIVISOR pdDst, + UINT32 totalOperations, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SymCryptFdefIntToDivisor( piSrc, pdDst, totalOperations, flags, pbScratch, cbScratch ); +} + +VOID +SYMCRYPT_CALL +SymCryptIntDivMod( + _In_ PCSYMCRYPT_INT piSrc, + _In_ PCSYMCRYPT_DIVISOR pdDivisor, + _Out_opt_ PSYMCRYPT_INT piQuotient, + _Out_opt_ PSYMCRYPT_INT piRemainder, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SymCryptFdefIntDivMod( piSrc, pdDivisor, piQuotient, piRemainder, pbScratch, cbScratch ); +} + + +PSYMCRYPT_MODULUS +SYMCRYPT_CALL +SymCryptModulusAllocate( UINT32 nDigits ) +{ + return SymCryptFdefModulusAllocate( nDigits ); +} + +VOID +SYMCRYPT_CALL +SymCryptModulusFree( _Out_ PSYMCRYPT_MODULUS pmObj ) +{ + SymCryptFdefModulusFree( pmObj ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptSizeofModulusFromDigits( UINT32 nDigits ) +{ + return SymCryptFdefSizeofModulusFromDigits( nDigits ); +} + +PSYMCRYPT_MODULUS +SYMCRYPT_CALL +SymCryptModulusCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + UINT32 nDigits ) +{ + return SymCryptFdefModulusCreate( pbBuffer, cbBuffer, nDigits ); +} + +VOID +SYMCRYPT_CALL +SymCryptModulusWipe( _Out_ PSYMCRYPT_MODULUS pmObj ) +{ + SYMCRYPT_CHECK_MAGIC( pmObj ); + + SymCryptWipe( pmObj, pmObj->cbSize ); +} + +VOID +SymCryptModulusCopy( + _In_ PCSYMCRYPT_MODULUS pmSrc, + _Out_ PSYMCRYPT_MODULUS pmDst ) +{ + SymCryptFdefModulusCopy( pmSrc, pmDst ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptModulusDigitsizeOfObject( _In_ PCSYMCRYPT_MODULUS pmSrc ) +{ + return pmSrc->nDigits; +} + +PSYMCRYPT_MODELEMENT +SYMCRYPT_CALL +SymCryptModElementAllocate( _In_ PCSYMCRYPT_MODULUS pmMod ) +{ + return SymCryptFdefModElementAllocate( pmMod ); +} + +VOID +SYMCRYPT_CALL +SymCryptModElementFree( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peObj ) +{ + SymCryptFdefModElementFree( pmMod, peObj ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptSizeofModElementFromModulus( PCSYMCRYPT_MODULUS pmMod ) +{ + return SymCryptFdefSizeofModElementFromModulus( pmMod ); +} + +PSYMCRYPT_MODELEMENT +SYMCRYPT_CALL +SymCryptModElementCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + _In_ PCSYMCRYPT_MODULUS pmMod ) +{ + return SymCryptFdefModElementCreate( pbBuffer, cbBuffer, pmMod ); +} + +VOID +SYMCRYPT_CALL +SymCryptModElementWipe( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peDst ) +{ + SymCryptFdefModElementWipe( pmMod, peDst ); +} + +VOID +SymCryptModElementCopy( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst ) +{ + SymCryptFdefModElementCopy( pmMod, peSrc, peDst ); +} + +VOID +SymCryptModElementMaskedCopy( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst, + UINT32 mask ) +{ + SymCryptFdefModElementMaskedCopy( pmMod, peSrc, peDst, mask ); +} + +PSYMCRYPT_DIVISOR +SYMCRYPT_CALL +SymCryptDivisorFromModulus( _In_ PSYMCRYPT_MODULUS pmSrc ) +{ + return SymCryptFdefDivisorFromModulus( pmSrc ); +} + +VOID +SymCryptModElementConditionalSwap( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_ PSYMCRYPT_MODELEMENT peData1, + _Inout_ PSYMCRYPT_MODELEMENT peData2, + _In_ UINT32 cond ) +{ + SymCryptFdefModElementConditionalSwap( pmMod, peData1, peData2, cond ); +} + +PSYMCRYPT_INT +SYMCRYPT_CALL +SymCryptIntFromModulus( _In_ PSYMCRYPT_MODULUS pmSrc ) +{ + return SymCryptFdefIntFromModulus( pmSrc ); +} + +VOID +SYMCRYPT_CALL +SymCryptIntToModulus( + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_MODULUS pmDst, + UINT32 averageOperations, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + PSYMCRYPT_INT piSrcTweak = (PSYMCRYPT_INT) piSrc; + + // In CHKed build, we'll verify that the modulus is not prime, or that it is 2 or odd + // (Some inversion algorithms fail hard when one input isn't 2 or odd.) + // We are constant-time w.r.t. piSrc being odd or =2. We don't hide the size of any input, + // but inputs 2 and 3 are handled with the same code path. + SYMCRYPT_ASSERT( ((flags & SYMCRYPT_FLAG_MODULUS_PRIME) == 0) || + (((SymCryptIntGetValueLsbits32( piSrc ) & 1) | SymCryptIntIsEqualUint32( piSrc, 2 )) != 0) ); + + SymCryptFdefIntToModulus( piSrcTweak, pmDst, averageOperations, flags, pbScratch, cbScratch ); +} + +VOID +SYMCRYPT_CALL +SymCryptIntToModElement( + _In_ PCSYMCRYPT_INT piSrc, + _In_ PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SymCryptFdefIntToModElement( piSrc, pmMod, peDst, pbScratch, cbScratch ); +} + +SYMCRYPT_DISABLE_CFG +VOID +SYMCRYPT_CALL +SymCryptModElementToInt( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_INT piDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + PCUINT32 pData; + + SYMCRYPT_ASSERT( piDst->nDigits >= pmMod->nDigits ); + + pData = SYMCRYPT_MOD_CALL( pmMod ) modPreGet( pmMod, peSrc, pbScratch, cbScratch ); + + SymCryptFdefModElementToIntGeneric( pmMod, pData, piDst, pbScratch, cbScratch ); +} + +SYMCRYPT_DISABLE_CFG +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptModElementSetValue( + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_NUMBER_FORMAT format, + PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError; + + scError = SymCryptFdefModElementSetValueGeneric( pbSrc, cbSrc, format, pmMod, peDst, pbScratch, cbScratch ); + + if( scError == SYMCRYPT_NO_ERROR ) + { + SYMCRYPT_MOD_CALL( pmMod ) modSetPost( pmMod, peDst, pbScratch, cbScratch ); + } + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptModElementGetValue( + PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_writes_bytes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst, + SYMCRYPT_NUMBER_FORMAT format, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + return SymCryptFdefModElementGetValue( pmMod, peSrc, pbDst, cbDst, format, pbScratch, cbScratch ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptModElementIsEqual( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2 ) +{ + return SymCryptFdefModElementIsEqual( pmMod, peSrc1, peSrc2 ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptModElementIsZero( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc ) +{ + return SymCryptFdefModElementIsZero( pmMod, peSrc ); +} + +SYMCRYPT_DISABLE_CFG +VOID +SYMCRYPT_CALL +SymCryptModAdd( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_MOD_CALL( pmMod ) modAdd( pmMod, peSrc1, peSrc2, peDst, pbScratch, cbScratch ); +} + +SYMCRYPT_DISABLE_CFG +VOID +SYMCRYPT_CALL +SymCryptModSub( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_MOD_CALL( pmMod ) modSub( pmMod, peSrc1, peSrc2, peDst, pbScratch, cbScratch ); +} + + +SYMCRYPT_DISABLE_CFG +VOID +SYMCRYPT_CALL +SymCryptModMul( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_MOD_CALL( pmMod ) modMul( pmMod, peSrc1, peSrc2, peDst, pbScratch, cbScratch ); +} + +SYMCRYPT_DISABLE_CFG +VOID +SYMCRYPT_CALL +SymCryptModSquare( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_MOD_CALL( pmMod ) modSquare( pmMod, peSrc, peDst, pbScratch, cbScratch ); +} + + +SYMCRYPT_DISABLE_CFG +VOID +SYMCRYPT_CALL +SymCryptModNeg( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_MOD_CALL( pmMod ) modNeg( pmMod, peSrc, peDst, pbScratch, cbScratch ); +} + +SYMCRYPT_DISABLE_CFG +VOID +SYMCRYPT_CALL +SymCryptModElementSetValueUint32( + UINT32 value, + _In_ PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SymCryptFdefModElementSetValueUint32Generic( value, pmMod, peDst, pbScratch, cbScratch ); + + SYMCRYPT_MOD_CALL( pmMod ) modSetPost( pmMod, peDst, pbScratch, cbScratch ); +} + +VOID +SYMCRYPT_CALL +SymCryptModElementSetValueNegUint32( + UINT32 value, + _In_ PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SymCryptFdefModElementSetValueNegUint32( value, pmMod, peDst, pbScratch, cbScratch ); +} + +VOID +SYMCRYPT_CALL +SymCryptModDivPow2( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + UINT32 exp, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SymCryptFdefModDivPow2( pmMod, peSrc, exp, peDst, pbScratch, cbScratch ); +} + +SYMCRYPT_DISABLE_CFG +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptModInv( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + return SYMCRYPT_MOD_CALL( pmMod ) modInv( pmMod, peSrc, peDst, flags, pbScratch, cbScratch ); +} + +VOID +SYMCRYPT_CALL +SymCryptModExp( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peBase, + _In_ PCSYMCRYPT_INT piExp, + UINT32 nBitsExp, + UINT32 flags, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SymCryptModExpGeneric( pmMod, peBase, piExp, nBitsExp, flags, peDst, pbScratch, cbScratch ); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptModMultiExp( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_reads_( nBases ) PCSYMCRYPT_MODELEMENT * peBaseArray, + _In_reads_( nBases ) PCSYMCRYPT_INT * piExpArray, + UINT32 nBases, + UINT32 nBitsExp, + UINT32 flags, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + return SymCryptModMultiExpGeneric( pmMod, peBaseArray, piExpArray, nBases, nBitsExp, flags, peDst, pbScratch, cbScratch ); +} + +SYMCRYPT_DISABLE_CFG +VOID +SYMCRYPT_CALL +SymCryptModSetRandom( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SymCryptFdefModSetRandomGeneric( pmMod, peDst, flags, pbScratch, cbScratch ); + + SYMCRYPT_MOD_CALL( pmMod ) modSetPost( pmMod, peDst, pbScratch, cbScratch ); +} + +PCSYMCRYPT_TRIALDIVISION_CONTEXT +SYMCRYPT_CALL +SymCryptCreateTrialDivisionContext( UINT32 nDigits ) +{ + return SymCryptFdefCreateTrialDivisionContext( nDigits ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptIntFindSmallDivisor( + _In_ PCSYMCRYPT_TRIALDIVISION_CONTEXT pContext, + _In_ PCSYMCRYPT_INT piSrc, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + return SymCryptFdefIntFindSmallDivisor( pContext, piSrc, pbScratch, cbScratch ); +} + +VOID +SYMCRYPT_CALL +SymCryptFreeTrialDivisionContext( PCSYMCRYPT_TRIALDIVISION_CONTEXT pContext ) +{ + SymCryptFdefFreeTrialDivisionContext( pContext ); +} diff --git a/libs/symcrypt/lib/aes-asm.c b/libs/symcrypt/lib/aes-asm.c new file mode 100644 index 00000000000..bd49d2e549a --- /dev/null +++ b/libs/symcrypt/lib/aes-asm.c @@ -0,0 +1,46 @@ +// +// aes-asm.c code for AES implementation +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + + +#include "precomp.h" + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM + +VOID +SYMCRYPT_CALL +SymCryptAesEcbEncryptAsm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + while( cbData >= SYMCRYPT_AES_BLOCK_SIZE ) + { + SymCryptAesEncryptAsm( pExpandedKey, pbSrc, pbDst ); + pbSrc += SYMCRYPT_AES_BLOCK_SIZE; + pbDst += SYMCRYPT_AES_BLOCK_SIZE; + cbData -= SYMCRYPT_AES_BLOCK_SIZE; + } +} + +VOID +SYMCRYPT_CALL +SymCryptAesEcbDecryptAsm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + while( cbData >= SYMCRYPT_AES_BLOCK_SIZE ) + { + SymCryptAesDecryptAsm( pExpandedKey, pbSrc, pbDst ); + pbSrc += SYMCRYPT_AES_BLOCK_SIZE; + pbDst += SYMCRYPT_AES_BLOCK_SIZE; + cbData -= SYMCRYPT_AES_BLOCK_SIZE; + } +} + +#endif diff --git a/libs/symcrypt/lib/aes-c.c b/libs/symcrypt/lib/aes-c.c new file mode 100644 index 00000000000..f2e22438487 --- /dev/null +++ b/libs/symcrypt/lib/aes-c.c @@ -0,0 +1,468 @@ +// +// aes-c.c code for AES implementation +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// The fast-ish C implementation of the core AES functions +// +// Separate C file because at some point we want to be able to switch this out with a compact-C implementation +// that is smaller. +// + +#include "precomp.h" + +// +// Static vs. dynamically generated tables. +// +// AES uses about 13 kB of tables; it turns out that most of these tables can be generated +// algorithmically much faster than they can be read off the disk. +// This implementation does not do so. +// The reason is that generated tables live in the modifyable data segment, which means +// that they are not shared between different instances of a DLL. +// Static tables are shared. Especially for applications that have a very large number +// of processes (e.g. Terminal Servers) the extra cost of generating and storing a +// per-process copy of these tables is higher then the cost of loading it a few times +// from disk. +// Earlier versions of this implementation did generate the tables dynamically and ran into +// this very problem. +// +// Our tables are aligned to eliminate side-channels from TLB lookups if the TLB page size +// is big enough. For example, the SboxMatrixMult table is 1024-aligned. Each use of that +// table consists of 4 lookups, and each lookup is within its own 1kB aligned subtable. +// The side-channels from cache lines still remains, of course. +// + +//extern BYTE SymCryptAesSbox[256]; // Basic S-box, not used +extern SYMCRYPT_ALIGN_AT( 256) BYTE SymCryptAesInvSbox[256]; // For final round in decryption +extern SYMCRYPT_ALIGN_AT(1024) BYTE SymCryptAesSboxMatrixMult[4][256][4]; // Main encryption tables +extern SYMCRYPT_ALIGN_AT(1024) BYTE SymCryptAesInvSboxMatrixMult[4][256][4];// Main decryption tables +extern SYMCRYPT_ALIGN_AT(1024) BYTE SymCryptAesInvMatrixMult[4][256][4]; // For computing decryption round keys + +// +// Throughout this implementation we use UINT32s to access byte arrays. The AES +// algorithm almost requires this; without it the performance would be abysmal. +// All data elements are SYMCRYPT_ALIGNed, which must be at least 4. +// + +// +// Macro to check for alignment to support platforms that need alignment fix-ups. +// +#define IS_UINT32_ALIGNED( __p ) ((((intptr_t)__p) & 3) == 0) + +// +// Only need to enforce alignment on platforms that are not x86 or x64 +// Future improvement: should switch to using unaligned pointer accesses +// on some platforms. +// +#define NEED_ALIGN (!(SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM | SYMCRYPT_CPU_ARM64)) + + +VOID +SYMCRYPT_CALL +SymCryptAes4SboxC( + _In_reads_(4) PCBYTE pIn, + _Out_writes_(4) PBYTE pOut ) +// +// Perform 4 S-box lookups. +// This is a separate function as it can be done side-channel safe using +// AES-NI. +// Key expansion can actually be improved a lot more with AES-NI, but that +// requires major code changes for which we don't have time right now. +// +{ + pOut[0] = SymCryptAesSboxMatrixMult[0][pIn[0]][1]; + pOut[1] = SymCryptAesSboxMatrixMult[0][pIn[1]][1]; + pOut[2] = SymCryptAesSboxMatrixMult[0][pIn[2]][1]; + pOut[3] = SymCryptAesSboxMatrixMult[0][pIn[3]][1]; +} + +VOID +SYMCRYPT_CALL +SymCryptAesCreateDecryptionRoundKeyC( + _In_reads_(16) PCBYTE pEncryptionRoundKey, + _Out_writes_(16) PBYTE pDecryptionRoundKey ) +// +// Convert an encryption round key to a decryption round key by applying the inverse +// mixcolumn function to each 4-byte subword. +// This is a separate function as with AES-NI there is an assembler version of this +// function that is side-channel safe. +// +{ + int i; + PBYTE p = pDecryptionRoundKey; + PCBYTE q = pEncryptionRoundKey; + + for( i=0; i<4; i++ ) { + *(UINT32 *)p = + *(UINT32 *)SymCryptAesInvMatrixMult[0][q[0]] ^ + *(UINT32 *)SymCryptAesInvMatrixMult[1][q[1]] ^ + *(UINT32 *)SymCryptAesInvMatrixMult[2][q[2]] ^ + *(UINT32 *)SymCryptAesInvMatrixMult[3][q[3]]; + p += 4; + q += 4; + } + +} + +// +// SymCryptAesEncrypt +// NOINLINE prevents the compiler from creating additional implementations +// that have to be FIPS selftested. +// +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptAesEncryptC( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_(SYMCRYPT_AES_BLOCK_SIZE) PCBYTE pbPlaintext, + _Out_writes_(SYMCRYPT_AES_BLOCK_SIZE) PBYTE pbCiphertext ) +{ + SYMCRYPT_ALIGN BYTE state[4][4] = { 0 }; + SYMCRYPT_ALIGN UINT32 state2[4] = { 0 }; + + const BYTE (*keyPtr)[4][4]; + const BYTE (*keyLimit)[4][4]; + +#if NEED_ALIGN + SYMCRYPT_ALIGN BYTE alignBuffer[SYMCRYPT_AES_BLOCK_SIZE]; +#endif + +#if NEED_ALIGN + + // + // Callers who don't have their buffers aligned don't care about speed, + // so we do this in the simplest way. + // + if( !(IS_UINT32_ALIGNED( pbPlaintext ) & IS_UINT32_ALIGNED( pbCiphertext )) ) { + memcpy( alignBuffer, pbPlaintext, SYMCRYPT_AES_BLOCK_SIZE ); + SymCryptAesEncrypt( pExpandedKey, alignBuffer, alignBuffer ); + memcpy( pbCiphertext, alignBuffer, SYMCRYPT_AES_BLOCK_SIZE ); + SymCryptWipeKnownSize( alignBuffer, sizeof( alignBuffer ) ); + return; + } +#endif + + SYMCRYPT_CHECK_MAGIC( pExpandedKey ); + + // + // From this point on all our data is UINT32 aligned or better on those + // platforms that have alignment restrictions. + // + + keyPtr = &pExpandedKey->RoundKey[0]; // First round key + keyLimit = &pExpandedKey->lastEncRoundKey[0]; // Last round key + + // Initial round (AddRoundKey) + *((UINT32 *) &state[0][0]) = *(UINT32 *) (*keyPtr)[0] ^ *(UINT32 *) &pbPlaintext[0]; + *((UINT32 *) &state[1][0]) = *(UINT32 *) (*keyPtr)[1] ^ *(UINT32 *) &pbPlaintext[4]; + *((UINT32 *) &state[2][0]) = *(UINT32 *) (*keyPtr)[2] ^ *(UINT32 *) &pbPlaintext[8]; + *((UINT32 *) &state[3][0]) = *(UINT32 *) (*keyPtr)[3] ^ *(UINT32 *) &pbPlaintext[12]; + + keyPtr += 1; + + // Main rounds + while (keyPtr < keyLimit) + { + + // SubBytes/ShiftRows/MixColumns for col. 0 + state2[0] = *((UINT32 *) &SymCryptAesSboxMatrixMult[0][ state[0][0] ]); + state2[3] = *((UINT32 *) &SymCryptAesSboxMatrixMult[1][ state[0][1] ]); + state2[2] = *((UINT32 *) &SymCryptAesSboxMatrixMult[2][ state[0][2] ]); + state2[1] = *((UINT32 *) &SymCryptAesSboxMatrixMult[3][ state[0][3] ]); + + // SubBytes/ShiftRows/MixColumns for col. 1 + state2[1] ^= *((UINT32 *) &SymCryptAesSboxMatrixMult[0][ state[1][0] ]); + state2[0] ^= *((UINT32 *) &SymCryptAesSboxMatrixMult[1][ state[1][1] ]); + state2[3] ^= *((UINT32 *) &SymCryptAesSboxMatrixMult[2][ state[1][2] ]); + state2[2] ^= *((UINT32 *) &SymCryptAesSboxMatrixMult[3][ state[1][3] ]); + + // SubBytes/ShiftRows/MixColumns for col. 2 + state2[2] ^= *((UINT32 *) &SymCryptAesSboxMatrixMult[0][ state[2][0] ]); + state2[1] ^= *((UINT32 *) &SymCryptAesSboxMatrixMult[1][ state[2][1] ]); + state2[0] ^= *((UINT32 *) &SymCryptAesSboxMatrixMult[2][ state[2][2] ]); + state2[3] ^= *((UINT32 *) &SymCryptAesSboxMatrixMult[3][ state[2][3] ]); + + // SubBytes/ShiftRows/MixColumns for col. 3 + state2[3] ^= *((UINT32 *) &SymCryptAesSboxMatrixMult[0][ state[3][0] ]); + state2[2] ^= *((UINT32 *) &SymCryptAesSboxMatrixMult[1][ state[3][1] ]); + state2[1] ^= *((UINT32 *) &SymCryptAesSboxMatrixMult[2][ state[3][2] ]); + state2[0] ^= *((UINT32 *) &SymCryptAesSboxMatrixMult[3][ state[3][3] ]); + + // AddRoundKey + *((UINT32 *) &state[0][0]) = *(UINT32 *) (*keyPtr)[0] ^ state2[0]; + *((UINT32 *) &state[1][0]) = *(UINT32 *) (*keyPtr)[1] ^ state2[1]; + *((UINT32 *) &state[2][0]) = *(UINT32 *) (*keyPtr)[2] ^ state2[2]; + *((UINT32 *) &state[3][0]) = *(UINT32 *) (*keyPtr)[3] ^ state2[3]; + + keyPtr += 1; + } + + // Final round + + // SubBytes/ShiftRows for col. 0 + state2[0] = (UINT32) SymCryptAesSboxMatrixMult[0][ state[0][0] ][1]; + state2[3] = (UINT32) SymCryptAesSboxMatrixMult[0][ state[0][1] ][1] << 8; + state2[2] = (UINT32) SymCryptAesSboxMatrixMult[0][ state[0][2] ][1] << 16; + state2[1] = (UINT32) SymCryptAesSboxMatrixMult[0][ state[0][3] ][1] << 24; + + // SubBytes/ShiftRows for col. 1 + state2[1] |= (UINT32) SymCryptAesSboxMatrixMult[0][ state[1][0] ][1]; + state2[0] |= (UINT32) SymCryptAesSboxMatrixMult[0][ state[1][1] ][1] << 8; + state2[3] |= (UINT32) SymCryptAesSboxMatrixMult[0][ state[1][2] ][1] << 16; + state2[2] |= (UINT32) SymCryptAesSboxMatrixMult[0][ state[1][3] ][1] << 24; + + // SubBytes/ShiftRows for col. 2 + state2[2] |= (UINT32) SymCryptAesSboxMatrixMult[0][ state[2][0] ][1]; + state2[1] |= (UINT32) SymCryptAesSboxMatrixMult[0][ state[2][1] ][1] << 8; + state2[0] |= (UINT32) SymCryptAesSboxMatrixMult[0][ state[2][2] ][1] << 16; + state2[3] |= (UINT32) SymCryptAesSboxMatrixMult[0][ state[2][3] ][1] << 24; + + // SubBytes/ShiftRows for col. 3 + state2[3] |= (UINT32) SymCryptAesSboxMatrixMult[0][ state[3][0] ][1]; + state2[2] |= (UINT32) SymCryptAesSboxMatrixMult[0][ state[3][1] ][1] << 8; + state2[1] |= (UINT32) SymCryptAesSboxMatrixMult[0][ state[3][2] ][1] << 16; + state2[0] |= (UINT32) SymCryptAesSboxMatrixMult[0][ state[3][3] ][1] << 24; + + // AddRoundKey + *((UINT32 *) &pbCiphertext[0 ]) = *(UINT32 *) (*keyPtr)[0] ^ state2[0]; + *((UINT32 *) &pbCiphertext[4 ]) = *(UINT32 *) (*keyPtr)[1] ^ state2[1]; + *((UINT32 *) &pbCiphertext[8 ]) = *(UINT32 *) (*keyPtr)[2] ^ state2[2]; + *((UINT32 *) &pbCiphertext[12]) = *(UINT32 *) (*keyPtr)[3] ^ state2[3]; + + SymCryptWipeKnownSize( state, sizeof( state ) ); + SymCryptWipeKnownSize( state2, sizeof( state2 ) ); + + return; +} + + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptAesDecryptC( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_(SYMCRYPT_AES_BLOCK_SIZE) PCBYTE pbCiphertext, + _Out_writes_(SYMCRYPT_AES_BLOCK_SIZE) PBYTE pbPlaintext ) +{ + SYMCRYPT_ALIGN BYTE state[4][4] = { 0 }; + SYMCRYPT_ALIGN UINT32 state2[4] = { 0 }; + + const BYTE (*keyPtr)[4][4]; + const BYTE (*keyLimit)[4][4]; + +#if NEED_ALIGN + SYMCRYPT_ALIGN BYTE alignBuffer[SYMCRYPT_AES_BLOCK_SIZE]; +#endif + +#if NEED_ALIGN + // + // Callers who don't have their buffers aligned don't care about speed, + // so we do this in the simplest way. + // + if( !(IS_UINT32_ALIGNED( pbPlaintext ) & IS_UINT32_ALIGNED( pbCiphertext )) ) { + memcpy( alignBuffer, pbCiphertext, SYMCRYPT_AES_BLOCK_SIZE ); + SymCryptAesDecrypt( pExpandedKey, alignBuffer, alignBuffer ); + memcpy( pbPlaintext, alignBuffer, SYMCRYPT_AES_BLOCK_SIZE ); + SymCryptWipeKnownSize( alignBuffer, sizeof( alignBuffer ) ); + return; + } +#endif + + SYMCRYPT_CHECK_MAGIC( pExpandedKey ); + + keyPtr = &pExpandedKey->lastEncRoundKey[0]; // First round key + keyLimit = &pExpandedKey->lastDecRoundKey[0]; // Last round key + + // Initial round (AddRoundKey) + *((UINT32 *) &state[0][0]) = *(UINT32 *) (*keyPtr)[0] ^ *(UINT32 *) &pbCiphertext[0]; + *((UINT32 *) &state[1][0]) = *(UINT32 *) (*keyPtr)[1] ^ *(UINT32 *) &pbCiphertext[4]; + *((UINT32 *) &state[2][0]) = *(UINT32 *) (*keyPtr)[2] ^ *(UINT32 *) &pbCiphertext[8]; + *((UINT32 *) &state[3][0]) = *(UINT32 *) (*keyPtr)[3] ^ *(UINT32 *) &pbCiphertext[12]; + + keyPtr += 1; + + // Main rounds + while (keyPtr < keyLimit) + { + + // SubBytes/ShiftRows/MixColumns for col. 0 + state2[0] = *((UINT32 *) &SymCryptAesInvSboxMatrixMult[0][ state[0][0] ]); + state2[1] = *((UINT32 *) &SymCryptAesInvSboxMatrixMult[1][ state[0][1] ]); + state2[2] = *((UINT32 *) &SymCryptAesInvSboxMatrixMult[2][ state[0][2] ]); + state2[3] = *((UINT32 *) &SymCryptAesInvSboxMatrixMult[3][ state[0][3] ]); + + // SubBytes/ShiftRows/MixColumns for col. 1 + state2[1] ^= *((UINT32 *) &SymCryptAesInvSboxMatrixMult[0][ state[1][0] ]); + state2[2] ^= *((UINT32 *) &SymCryptAesInvSboxMatrixMult[1][ state[1][1] ]); + state2[3] ^= *((UINT32 *) &SymCryptAesInvSboxMatrixMult[2][ state[1][2] ]); + state2[0] ^= *((UINT32 *) &SymCryptAesInvSboxMatrixMult[3][ state[1][3] ]); + + // SubBytes/ShiftRows/MixColumns for col. 2 + state2[2] ^= *((UINT32 *) &SymCryptAesInvSboxMatrixMult[0][ state[2][0] ]); + state2[3] ^= *((UINT32 *) &SymCryptAesInvSboxMatrixMult[1][ state[2][1] ]); + state2[0] ^= *((UINT32 *) &SymCryptAesInvSboxMatrixMult[2][ state[2][2] ]); + state2[1] ^= *((UINT32 *) &SymCryptAesInvSboxMatrixMult[3][ state[2][3] ]); + + // SubBytes/ShiftRows/MixColumns for col. 3 + state2[3] ^= *((UINT32 *) &SymCryptAesInvSboxMatrixMult[0][ state[3][0] ]); + state2[0] ^= *((UINT32 *) &SymCryptAesInvSboxMatrixMult[1][ state[3][1] ]); + state2[1] ^= *((UINT32 *) &SymCryptAesInvSboxMatrixMult[2][ state[3][2] ]); + state2[2] ^= *((UINT32 *) &SymCryptAesInvSboxMatrixMult[3][ state[3][3] ]); + + // AddRoundKey + *((UINT32 *) &state[0][0]) = *(UINT32 *) (*keyPtr)[0] ^ state2[0]; + *((UINT32 *) &state[1][0]) = *(UINT32 *) (*keyPtr)[1] ^ state2[1]; + *((UINT32 *) &state[2][0]) = *(UINT32 *) (*keyPtr)[2] ^ state2[2]; + *((UINT32 *) &state[3][0]) = *(UINT32 *) (*keyPtr)[3] ^ state2[3]; + + keyPtr += 1; + } + + // Final round + + // SubBytes/ShiftRows for col. 0 + state2[0] = (UINT32) SymCryptAesInvSbox[ state[0][0] ]; + state2[1] = (UINT32) SymCryptAesInvSbox[ state[0][1] ] << 8; + state2[2] = (UINT32) SymCryptAesInvSbox[ state[0][2] ] << 16; + state2[3] = (UINT32) SymCryptAesInvSbox[ state[0][3] ] << 24; + + // SubBytes/ShiftRows for col. 1 + state2[1] |= (UINT32) SymCryptAesInvSbox[ state[1][0] ]; + state2[2] |= (UINT32) SymCryptAesInvSbox[ state[1][1] ] << 8; + state2[3] |= (UINT32) SymCryptAesInvSbox[ state[1][2] ] << 16; + state2[0] |= (UINT32) SymCryptAesInvSbox[ state[1][3] ] << 24; + + // SubBytes/ShiftRows for col. 2 + state2[2] |= (UINT32) SymCryptAesInvSbox[ state[2][0] ]; + state2[3] |= (UINT32) SymCryptAesInvSbox[ state[2][1] ] << 8; + state2[0] |= (UINT32) SymCryptAesInvSbox[ state[2][2] ] << 16; + state2[1] |= (UINT32) SymCryptAesInvSbox[ state[2][3] ] << 24; + + // SubBytes/ShiftRows for col. 3 + state2[3] |= (UINT32) SymCryptAesInvSbox[ state[3][0] ]; + state2[0] |= (UINT32) SymCryptAesInvSbox[ state[3][1] ] << 8; + state2[1] |= (UINT32) SymCryptAesInvSbox[ state[3][2] ] << 16; + state2[2] |= (UINT32) SymCryptAesInvSbox[ state[3][3] ] << 24; + + // AddRoundKey + *((UINT32 *) &pbPlaintext[0 ]) = *(UINT32 *) (*keyPtr)[0] ^ state2[0]; + *((UINT32 *) &pbPlaintext[4 ]) = *(UINT32 *) (*keyPtr)[1] ^ state2[1]; + *((UINT32 *) &pbPlaintext[8 ]) = *(UINT32 *) (*keyPtr)[2] ^ state2[2]; + *((UINT32 *) &pbPlaintext[12]) = *(UINT32 *) (*keyPtr)[3] ^ state2[3]; + + SymCryptWipeKnownSize( state, sizeof( state ) ); + SymCryptWipeKnownSize( state2, sizeof( state2 ) ); + + return; +} + +VOID +SYMCRYPT_CALL +SymCryptAesEcbEncryptC( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + while( cbData >= SYMCRYPT_AES_BLOCK_SIZE ) + { + SymCryptAesEncryptC( pExpandedKey, pbSrc, pbDst ); + pbSrc += SYMCRYPT_AES_BLOCK_SIZE; + pbDst += SYMCRYPT_AES_BLOCK_SIZE; + cbData -= SYMCRYPT_AES_BLOCK_SIZE; + } +} + +VOID +SYMCRYPT_CALL +SymCryptAesEcbDecryptC( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + while( cbData >= SYMCRYPT_AES_BLOCK_SIZE ) + { + SymCryptAesDecryptC( pExpandedKey, pbSrc, pbDst ); + pbSrc += SYMCRYPT_AES_BLOCK_SIZE; + pbDst += SYMCRYPT_AES_BLOCK_SIZE; + cbData -= SYMCRYPT_AES_BLOCK_SIZE; + } +} + +/* Wine hack: asm not supported yet */ + +VOID +SYMCRYPT_CALL +SymCryptAesEncryptAsm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbDst ) +{ + SymCryptAesEncryptC( pExpandedKey, pbSrc, pbDst ); +} + +VOID +SYMCRYPT_CALL +SymCryptAesDecryptAsm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbDst ) +{ + SymCryptAesDecryptC( pExpandedKey, pbSrc, pbDst ); +} + +VOID +SYMCRYPT_CALL +SymCryptAesCbcEncryptAsm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + SymCryptCbcEncrypt( &SymCryptAesBlockCipherNoOpt, pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); +} + +VOID +SYMCRYPT_CALL +SymCryptAesCbcDecryptAsm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + SymCryptCbcDecrypt( &SymCryptAesBlockCipherNoOpt, pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); +} + +VOID +SYMCRYPT_CALL +SymCryptAesCtrMsb64Asm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + SymCryptCtrMsb64( &SymCryptAesBlockCipherNoOpt, pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); +} + +VOID +SYMCRYPT_CALL +SymCryptSha256AppendBlocks_ul1( + _Inout_ SYMCRYPT_SHA256_CHAINING_STATE * pChain, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ); + +VOID +SYMCRYPT_CALL +SymCryptSha256AppendBlocks_xmm_ssse3_asm( + _Inout_ SYMCRYPT_SHA256_CHAINING_STATE * pChain, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ) +{ + SymCryptSha256AppendBlocks_ul1( pChain, pbData, cbData, pcbRemaining ); +} diff --git a/libs/symcrypt/lib/aes-default-bc.c b/libs/symcrypt/lib/aes-default-bc.c new file mode 100644 index 00000000000..2f38fc4aabe --- /dev/null +++ b/libs/symcrypt/lib/aes-default-bc.c @@ -0,0 +1,92 @@ +// +// aes-default-bc.c code for AES implementation +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +// +// The SymCrypt API allows callers to use the generic block cipher mode functions and pass +// a pointer to a structure that describes the block cipher. +// This structure contains pointers to all the optimized implementations of the various modes. +// This pulls in all the mode-specific code, which in some cases we don't want. +// +// We isolate the SymCryptAesBlockCipher structure into this separate C file so that it only gets +// pulled in when the application uses this structure. +// + +// +// The virtual table for the AES block cipher. +// +// All pointers must point to specialized functions. The general +// block cipher mode functions will call these pointers if they are non-NULL +// so if they point back to an implementation that calls the generic +// mode functions we get an infinite recursion. +// +// NOTE: the compile-time conditions in this file should track the actual implementations in +// aes-default.c. +// + +const SYMCRYPT_BLOCKCIPHER SymCryptAesBlockCipher_Fast = { + &SymCryptAesExpandKey, + &SymCryptAesEncrypt, + &SymCryptAesDecrypt, + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM | SYMCRYPT_CPU_ARM64 + &SymCryptAesEcbEncrypt, +#else + NULL, +#endif + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + &SymCryptAesEcbDecrypt, +#else + NULL, +#endif + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM | SYMCRYPT_CPU_ARM64 + &SymCryptAesCbcEncrypt, +#else + NULL, +#endif + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM | SYMCRYPT_CPU_ARM64 + &SymCryptAesCbcDecrypt, +#else + NULL, +#endif + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM64 + &SymCryptAesCbcMac, +#else + NULL, +#endif + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM | SYMCRYPT_CPU_ARM64 + &SymCryptAesCtrMsb64, +#else + NULL, +#endif + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM | SYMCRYPT_CPU_ARM64 + &SymCryptAesGcmEncryptPart, +#else + NULL, +#endif + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM | SYMCRYPT_CPU_ARM64 + &SymCryptAesGcmDecryptPart, +#else + NULL, +#endif + + SYMCRYPT_AES_BLOCK_SIZE, + sizeof( SYMCRYPT_AES_EXPANDED_KEY ), +}; + +// +// This indirection makes it easier to switch implementations in a binary without +// changing the calling code. +// +const PCSYMCRYPT_BLOCKCIPHER SymCryptAesBlockCipher = &SymCryptAesBlockCipher_Fast; diff --git a/libs/symcrypt/lib/aes-default.c b/libs/symcrypt/lib/aes-default.c new file mode 100644 index 00000000000..fce247d7f74 --- /dev/null +++ b/libs/symcrypt/lib/aes-default.c @@ -0,0 +1,872 @@ +// +// aes-default.c code for AES implementation +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// This is the interface for the default AES implementation. +// On each platform, this is the fastest AES implementation irrespective of code size. +// It uses assembler, XMM, or any other trick. +// + + +#include "precomp.h" + +// +// Virtual table for generic functions +// This allows us to default to generic implementations for some modes without pulling in all the +// dedicated functions. +// We use this when we cannot use the optimized implementations for some reason. +// +const SYMCRYPT_BLOCKCIPHER SymCryptAesBlockCipherNoOpt = { + &SymCryptAesExpandKey, +#if SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_ARM + &SymCryptAesEncryptAsm, + &SymCryptAesDecryptAsm, +#else + &SymCryptAesEncryptC, + &SymCryptAesDecryptC, +#endif + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + + SYMCRYPT_AES_BLOCK_SIZE, + sizeof( SYMCRYPT_AES_EXPANDED_KEY ), +}; + +VOID +SYMCRYPT_CALL +SymCryptAes4Sbox( _In_reads_(4) PCBYTE pIn, _Out_writes_(4) PBYTE pOut, BOOL UseSimd ) +{ +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + if( UseSimd ) + { + SymCryptAes4SboxXmm( pIn, pOut ); + } else { + SymCryptAes4SboxC( pIn, pOut ); + } +#elif SYMCRYPT_CPU_ARM64 + if( UseSimd ) + { + SymCryptAes4SboxNeon( pIn, pOut ); + } else { + SymCryptAes4SboxC( pIn, pOut ); + } +#else + UNREFERENCED_PARAMETER( UseSimd ); + SymCryptAes4SboxC( pIn, pOut ); // never use XMM on SaveXmm arch, save/restore overhead is too large. +#endif +} + +VOID +SYMCRYPT_CALL +SymCryptAesCreateDecryptionRoundKey( + _In_reads_(16) PCBYTE pEncryptionRoundKey, + _Out_writes_(16) PBYTE pDecryptionRoundKey, + BOOL UseSimd ) +{ +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + if( UseSimd ) + { + SymCryptAesCreateDecryptionRoundKeyXmm( pEncryptionRoundKey, pDecryptionRoundKey ); + } else { + SymCryptAesCreateDecryptionRoundKeyC( pEncryptionRoundKey, pDecryptionRoundKey ); + } +#elif SYMCRYPT_CPU_ARM64 + if( UseSimd ) + { + SymCryptAesCreateDecryptionRoundKeyNeon( pEncryptionRoundKey, pDecryptionRoundKey ); + } else { + SymCryptAesCreateDecryptionRoundKeyC( pEncryptionRoundKey, pDecryptionRoundKey ); + } +#else + UNREFERENCED_PARAMETER( UseSimd ); + SymCryptAesCreateDecryptionRoundKeyC( pEncryptionRoundKey, pDecryptionRoundKey ); // never use XMM on SaveXmm arch, save/restore overhead is too large. +#endif +} + +VOID +SYMCRYPT_CALL +SymCryptAesEncrypt( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_(SYMCRYPT_AES_BLOCK_SIZE) PCBYTE pbSrc, + _Out_writes_(SYMCRYPT_AES_BLOCK_SIZE) PBYTE pbDst ) +{ +#if SYMCRYPT_CPU_AMD64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE ) ) + { + SymCryptAesEncryptXmm( pExpandedKey, pbSrc, pbDst ); + } else { + SymCryptAesEncryptAsm( pExpandedKey, pbSrc, pbDst ); + } +#elif SYMCRYPT_CPU_X86 + SYMCRYPT_EXTENDED_SAVE_DATA SaveData; + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE ) && + SymCryptSaveXmm( &SaveData ) == SYMCRYPT_NO_ERROR ) + { + SymCryptAesEncryptXmm( pExpandedKey, pbSrc, pbDst ); + SymCryptRestoreXmm( &SaveData ); + } else { + SymCryptAesEncryptAsm( pExpandedKey, pbSrc, pbDst ); + } +#elif SYMCRYPT_CPU_ARM + SymCryptAesEncryptAsm( pExpandedKey, pbSrc, pbDst ); +#elif SYMCRYPT_CPU_ARM64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_NEON_AES ) ) + { + SymCryptAesEncryptNeon( pExpandedKey, pbSrc, pbDst ); + } else { + SymCryptAesEncryptC( pExpandedKey, pbSrc, pbDst ); + } +#else + SymCryptAesEncryptC( pExpandedKey, pbSrc, pbDst ); +#endif +} + +VOID +SYMCRYPT_CALL +SymCryptAesDecrypt( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_(SYMCRYPT_AES_BLOCK_SIZE) PCBYTE pbSrc, + _Out_writes_(SYMCRYPT_AES_BLOCK_SIZE) PBYTE pbDst ) +{ +#if SYMCRYPT_CPU_AMD64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE ) ) + { + SymCryptAesDecryptXmm( pExpandedKey, pbSrc, pbDst ); + } else { + SymCryptAesDecryptAsm( pExpandedKey, pbSrc, pbDst ); + } +#elif SYMCRYPT_CPU_X86 + SYMCRYPT_EXTENDED_SAVE_DATA SaveData; + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE ) && + SymCryptSaveXmm( &SaveData ) == SYMCRYPT_NO_ERROR ) + { + SymCryptAesDecryptXmm( pExpandedKey, pbSrc, pbDst ); + SymCryptRestoreXmm( &SaveData ); + } else { + SymCryptAesDecryptAsm( pExpandedKey, pbSrc, pbDst ); + } +#elif SYMCRYPT_CPU_ARM + SymCryptAesDecryptAsm( pExpandedKey, pbSrc, pbDst ); +#elif SYMCRYPT_CPU_ARM64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_NEON_AES ) ) + { + SymCryptAesDecryptNeon( pExpandedKey, pbSrc, pbDst ); + } else { + SymCryptAesDecryptC( pExpandedKey, pbSrc, pbDst ); + } +#else + SymCryptAesDecryptC( pExpandedKey, pbSrc, pbDst ); +#endif +} + +VOID +SYMCRYPT_CALL +SymCryptAesCbcEncrypt( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ +#if SYMCRYPT_CPU_AMD64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE ) ) + { + SymCryptAesCbcEncryptXmm( pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + } else { + SymCryptAesCbcEncryptAsm( pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + } +#elif SYMCRYPT_CPU_X86 + SYMCRYPT_EXTENDED_SAVE_DATA SaveData; + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE ) && + SymCryptSaveXmm( &SaveData ) == SYMCRYPT_NO_ERROR ) + { + SymCryptAesCbcEncryptXmm( pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + SymCryptRestoreXmm( &SaveData ); + } else { + SymCryptAesCbcEncryptAsm( pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + } +#elif SYMCRYPT_CPU_ARM + SymCryptAesCbcEncryptAsm( pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); +#elif SYMCRYPT_CPU_ARM64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_NEON_AES ) ) + { + SymCryptAesCbcEncryptNeon( pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + } else { + SymCryptCbcEncrypt( &SymCryptAesBlockCipherNoOpt, pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + } +#else + SymCryptCbcEncrypt( &SymCryptAesBlockCipherNoOpt, pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); +#endif +} + +VOID +SYMCRYPT_CALL +SymCryptAesCbcDecrypt( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ +#if SYMCRYPT_CPU_AMD64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE ) ) + { + SymCryptAesCbcDecryptXmm( pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + } else { + SymCryptAesCbcDecryptAsm( pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + } +#elif SYMCRYPT_CPU_X86 + SYMCRYPT_EXTENDED_SAVE_DATA SaveData; + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE ) && + SymCryptSaveXmm( &SaveData ) == SYMCRYPT_NO_ERROR ) + { + SymCryptAesCbcDecryptXmm( pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + SymCryptRestoreXmm( &SaveData ); + } else { + SymCryptAesCbcDecryptAsm( pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + } +#elif SYMCRYPT_CPU_ARM + SymCryptAesCbcDecryptAsm( pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); +#elif SYMCRYPT_CPU_ARM64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_NEON_AES ) ) + { + SymCryptAesCbcDecryptNeon( pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + } else { + SymCryptCbcDecrypt( &SymCryptAesBlockCipherNoOpt, pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + } +#else + SymCryptCbcDecrypt( &SymCryptAesBlockCipherNoOpt, pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); +#endif +} + +VOID +SYMCRYPT_CALL +SymCryptAesEcbEncrypt( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ +#if SYMCRYPT_CPU_AMD64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE ) ) + { + SymCryptAesEcbEncryptXmm( pExpandedKey, pbSrc, pbDst, cbData ); + } else { + SymCryptAesEcbEncryptAsm( pExpandedKey, pbSrc, pbDst, cbData ); + } +#elif SYMCRYPT_CPU_X86 + SYMCRYPT_EXTENDED_SAVE_DATA SaveData; + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE ) && + SymCryptSaveXmm( &SaveData ) == SYMCRYPT_NO_ERROR ) + { + SymCryptAesEcbEncryptXmm( pExpandedKey, pbSrc, pbDst, cbData ); + SymCryptRestoreXmm( &SaveData ); + } else { + SymCryptAesEcbEncryptAsm( pExpandedKey, pbSrc, pbDst, cbData ); + } +#elif SYMCRYPT_CPU_ARM + SymCryptAesEcbEncryptAsm( pExpandedKey, pbSrc, pbDst, cbData ); +#elif SYMCRYPT_CPU_ARM64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_NEON_AES ) ) + { + SymCryptAesEcbEncryptNeon( pExpandedKey, pbSrc, pbDst, cbData ); + } else { + SymCryptAesEcbEncryptC( pExpandedKey, pbSrc, pbDst, cbData ); + } +#else + SymCryptAesEcbEncryptC( pExpandedKey, pbSrc, pbDst, cbData ); +#endif +} + +// +// NOTE: There is no reason that SymCryptAesEcbDecrypt could not have unrolled versions similar to +// SymCryptAesEcbEncrypt if a real use case requiring large scale Ecb decryption is found. +// For now just decrypt 1 block at a time to reduce code size. +// +VOID +SYMCRYPT_CALL +SymCryptAesEcbDecrypt( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + while( cbData >= SYMCRYPT_AES_BLOCK_SIZE ) + { + SymCryptAesDecrypt( pExpandedKey, pbSrc, pbDst ); + pbSrc += SYMCRYPT_AES_BLOCK_SIZE; + pbDst += SYMCRYPT_AES_BLOCK_SIZE; + cbData -= SYMCRYPT_AES_BLOCK_SIZE; + } +} + +VOID +SYMCRYPT_CALL +SymCryptAesCbcMac( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ +#if SYMCRYPT_CPU_AMD64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE ) ) + { + SymCryptAesCbcMacXmm( pExpandedKey, pbChainingValue, pbData, cbData ); + } else { + SYMCRYPT_ASSERT( SymCryptAesBlockCipherNoOpt.blockSize == SYMCRYPT_AES_BLOCK_SIZE ); + SymCryptCbcMac( &SymCryptAesBlockCipherNoOpt, pExpandedKey, pbChainingValue, pbData, cbData ); + } +#elif SYMCRYPT_CPU_X86 + SYMCRYPT_EXTENDED_SAVE_DATA SaveData; + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE ) && + SymCryptSaveXmm( &SaveData ) == SYMCRYPT_NO_ERROR ) + { + SymCryptAesCbcMacXmm( pExpandedKey, pbChainingValue, pbData, cbData ); + SymCryptRestoreXmm( &SaveData ); + } else { + SYMCRYPT_ASSERT( SymCryptAesBlockCipherNoOpt.blockSize == SYMCRYPT_AES_BLOCK_SIZE ); + SymCryptCbcMac( &SymCryptAesBlockCipherNoOpt, pExpandedKey, pbChainingValue, pbData, cbData ); + } +#elif SYMCRYPT_CPU_ARM64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_NEON_AES ) ) + { + SymCryptAesCbcMacNeon( pExpandedKey, pbChainingValue, pbData, cbData ); + } else { + SYMCRYPT_ASSERT( SymCryptAesBlockCipherNoOpt.blockSize == SYMCRYPT_AES_BLOCK_SIZE ); + SymCryptCbcMac( &SymCryptAesBlockCipherNoOpt, pExpandedKey, pbChainingValue, pbData, cbData ); + } +#else + SYMCRYPT_ASSERT( SymCryptAesBlockCipherNoOpt.blockSize == SYMCRYPT_AES_BLOCK_SIZE ); + SymCryptCbcMac( &SymCryptAesBlockCipherNoOpt, pExpandedKey, pbChainingValue, pbData, cbData ); +#endif +} + +VOID +SYMCRYPT_CALL +SymCryptAesCtrMsb32( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ +#if SYMCRYPT_CPU_AMD64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE ) ) + { + SymCryptAesCtrMsb32Xmm( pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + } else { + SYMCRYPT_ASSERT( SymCryptAesBlockCipherNoOpt.blockSize == SYMCRYPT_AES_BLOCK_SIZE ); // keep Prefast happy + SymCryptCtrMsb32( &SymCryptAesBlockCipherNoOpt, pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + } + +#elif SYMCRYPT_CPU_X86 + SYMCRYPT_EXTENDED_SAVE_DATA SaveData; + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE ) && + SymCryptSaveXmm( &SaveData ) == SYMCRYPT_NO_ERROR ) + { + SymCryptAesCtrMsb32Xmm( pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + SymCryptRestoreXmm( &SaveData ); + } else { + SYMCRYPT_ASSERT( SymCryptAesBlockCipherNoOpt.blockSize == SYMCRYPT_AES_BLOCK_SIZE ); // keep Prefast happy + SymCryptCtrMsb32( &SymCryptAesBlockCipherNoOpt, pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + } + +#elif SYMCRYPT_CPU_ARM64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_NEON_AES ) ) + { + SymCryptAesCtrMsb32Neon( pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + } else { + SymCryptCtrMsb32( &SymCryptAesBlockCipherNoOpt, pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + } + +#else + SYMCRYPT_ASSERT( SymCryptAesBlockCipherNoOpt.blockSize == SYMCRYPT_AES_BLOCK_SIZE ); // keep Prefast happy + SymCryptCtrMsb32( &SymCryptAesBlockCipherNoOpt, pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); +#endif +} + +VOID +SYMCRYPT_CALL +SymCryptAesCtrMsb64( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ +#if SYMCRYPT_CPU_AMD64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE ) ) + { + SymCryptAesCtrMsb64Xmm( pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + } else { + SymCryptAesCtrMsb64Asm( pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + } + +#elif SYMCRYPT_CPU_X86 + SYMCRYPT_EXTENDED_SAVE_DATA SaveData; + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE ) && + SymCryptSaveXmm( &SaveData ) == SYMCRYPT_NO_ERROR ) + { + SymCryptAesCtrMsb64Xmm( pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + SymCryptRestoreXmm( &SaveData ); + } else { + SymCryptAesCtrMsb64Asm( pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + } + +#elif SYMCRYPT_CPU_ARM + SymCryptAesCtrMsb64Asm( pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + +#elif SYMCRYPT_CPU_ARM64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_NEON_AES ) ) + { + SymCryptAesCtrMsb64Neon( pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + } else { + SymCryptCtrMsb64( &SymCryptAesBlockCipherNoOpt, pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + } + +#else + SYMCRYPT_ASSERT( SymCryptAesBlockCipherNoOpt.blockSize == SYMCRYPT_AES_BLOCK_SIZE ); // keep Prefast happy + SymCryptCtrMsb64( &SymCryptAesBlockCipherNoOpt, pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); +#endif +} + +VOID +SYMCRYPT_CALL +SymCryptAesGcmEncryptPartOnePass( + _Inout_ PSYMCRYPT_GCM_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + SIZE_T bytesToProcess; +#if SYMCRYPT_CPU_AMD64 + SYMCRYPT_EXTENDED_SAVE_DATA SaveData; +#endif + + // + // We have entered the encrypt phase, the AAD has been padded to be a multiple of block size + // We know that the bytes still to use in the key stream buffer and the bytes left to fill the + // macBlock will be the same in the context of this function + // + SYMCRYPT_ASSERT( (pState->cbData & SYMCRYPT_GCM_BLOCK_MOD_MASK) == pState->bytesInMacBlock ); + + // + // We update pState->cbData once before we modify cbData. + // pState->cbData is not used in the rest of this function + // + SYMCRYPT_ASSERT( pState->cbData + cbData <= SYMCRYPT_GCM_MAX_DATA_SIZE ); + pState->cbData += cbData; + + if( pState->bytesInMacBlock > 0 ) + { + bytesToProcess = SYMCRYPT_MIN( cbData, SYMCRYPT_GCM_BLOCK_SIZE - pState->bytesInMacBlock ); + SymCryptXorBytes( + pbSrc, + &pState->keystreamBlock[pState->bytesInMacBlock], + &pState->macBlock[pState->bytesInMacBlock], + bytesToProcess ); + memcpy( pbDst, &pState->macBlock[pState->bytesInMacBlock], bytesToProcess ); + pbSrc += bytesToProcess; + pbDst += bytesToProcess; + cbData -= bytesToProcess; + pState->bytesInMacBlock += bytesToProcess; + + if( pState->bytesInMacBlock == SYMCRYPT_GCM_BLOCK_SIZE ) + { + SymCryptGHashAppendData( &pState->pKey->ghashKey, + &pState->ghashState, + &pState->macBlock[0], + SYMCRYPT_GCM_BLOCK_SIZE ); + pState->bytesInMacBlock = 0; + } + + // + // If there are bytes left in the key stream buffer, then cbData == 0 and we're done. + // If we used up all the bytes, then we are fine, no need to compute the next key stream block + // + } + + if( cbData >= SYMCRYPT_GCM_BLOCK_SIZE ) + { + bytesToProcess = cbData & SYMCRYPT_GCM_BLOCK_ROUND_MASK; + + // + // We use a Gcm function that increments the CTR by 64 bits, rather than the 32 bits that GCM requires. + // As we only support 12-byte nonces, the 32-bit counter never overflows, and we can safely use + // the 64-bit incrementing primitive. + // If we ever support other nonce sizes this is going to be a big problem. + // You can't fake a 32-bit counter using a 64-bit counter function without side-channels that expose + // information about the current counter value. + // With other nonce sizes the actual counter value itself is not public, so we can't expose that. + // We can do two things: + // - create SymCryptAesGcmEncryptXXX32 + // - Accept that we leak information about the counter value; after all it is not treated as a + // secret when the nonce is 12 bytes. + // + SYMCRYPT_ASSERT( pState->pKey->pBlockCipher->blockSize == SYMCRYPT_GCM_BLOCK_SIZE ); + +#if SYMCRYPT_CPU_AMD64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_VAES_256_CODE ) && + (bytesToProcess >= GCM_YMM_MINBLOCKS * SYMCRYPT_GCM_BLOCK_SIZE) && + SymCryptSaveYmm( &SaveData ) == SYMCRYPT_NO_ERROR ) + { + SymCryptAesGcmEncryptStitchedYmm_2048( + &pState->pKey->blockcipherKey.aes, + &pState->counterBlock[0], + &pState->pKey->ghashKey.table[0], + &pState->ghashState, + pbSrc, + pbDst, + bytesToProcess ); + + SymCryptRestoreYmm( &SaveData ); + } else { + SymCryptAesGcmEncryptStitchedXmm( + &pState->pKey->blockcipherKey.aes, + &pState->counterBlock[0], + &pState->pKey->ghashKey.table[0], + &pState->ghashState, + pbSrc, + pbDst, + bytesToProcess ); + } + +#elif SYMCRYPT_CPU_X86 + SymCryptAesGcmEncryptStitchedXmm( + &pState->pKey->blockcipherKey.aes, + &pState->counterBlock[0], + (PSYMCRYPT_GF128_ELEMENT)&pState->pKey->ghashKey.tableSpace[pState->pKey->ghashKey.tableOffset], + &pState->ghashState, + pbSrc, + pbDst, + bytesToProcess ); + +#elif SYMCRYPT_CPU_ARM64 + SymCryptAesGcmEncryptStitchedNeon( + &pState->pKey->blockcipherKey.aes, + &pState->counterBlock[0], + &pState->pKey->ghashKey.table[0], + &pState->ghashState, + pbSrc, + pbDst, + bytesToProcess ); + +#else + SymCryptAesCtrMsb32(&pState->pKey->blockcipherKey.aes, + &pState->counterBlock[0], + pbSrc, + pbDst, + cbData ); + // + // We break the read-once/write once rule here by reading the pbDst data back. + // In this particular situation this is safe, and avoiding it is expensive as it + // requires an extra copy and an extra memory buffer. + // The first write exposes the GCM key stream, independent of the underlying data that + // we are processing. From an attacking point of view we can think of this as literally + // handing over the key stream. So encryption consists of two steps: + // - hand over the key stream + // - MAC some ciphertext + // In this view (which has equivalent security properties to GCM) is obviously doesn't + // matter that we read pbDst back. + // + SymCryptGHashAppendData(&pState->pKey->ghashKey, + &pState->ghashState, + pbDst, + cbData ); + +#endif + + pbSrc += bytesToProcess; + pbDst += bytesToProcess; + cbData -= bytesToProcess; + } + + if( cbData > 0 ) + { + SymCryptWipeKnownSize( &pState->keystreamBlock[0], SYMCRYPT_GCM_BLOCK_SIZE ); + + SYMCRYPT_ASSERT( pState->pKey->pBlockCipher->blockSize == SYMCRYPT_GCM_BLOCK_SIZE ); + SymCryptAesCtrMsb32(&pState->pKey->blockcipherKey.aes, + &pState->counterBlock[0], + &pState->keystreamBlock[0], + &pState->keystreamBlock[0], + SYMCRYPT_GCM_BLOCK_SIZE ); + + SymCryptXorBytes( &pState->keystreamBlock[0], pbSrc, &pState->macBlock[0], cbData ); + memcpy( pbDst, &pState->macBlock[0], cbData ); + pState->bytesInMacBlock = cbData; + + // + // pState->cbData contains the data length after this call already, so it knows how many + // bytes are left in the keystream block + // + } +} + +VOID +SYMCRYPT_CALL +SymCryptAesGcmDecryptPartOnePass( + _Inout_ PSYMCRYPT_GCM_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + SIZE_T bytesToProcess; +#if SYMCRYPT_CPU_AMD64 + SYMCRYPT_EXTENDED_SAVE_DATA SaveData; +#endif + + // + // We have entered the decrypt phase, the AAD has been padded to be a multiple of block size + // We know that the bytes still to use in the key stream buffer and the bytes left to fill the + // macBlock will be the same in the context of this function + // + SYMCRYPT_ASSERT( (pState->cbData & SYMCRYPT_GCM_BLOCK_MOD_MASK) == pState->bytesInMacBlock ); + + // + // We update pState->cbData once before we modify cbData. + // pState->cbData is not used in the rest of this function + // + SYMCRYPT_ASSERT( pState->cbData + cbData <= SYMCRYPT_GCM_MAX_DATA_SIZE ); + pState->cbData += cbData; + + if( pState->bytesInMacBlock > 0 ) + { + bytesToProcess = SYMCRYPT_MIN( cbData, SYMCRYPT_GCM_BLOCK_SIZE - pState->bytesInMacBlock ); + memcpy( &pState->macBlock[pState->bytesInMacBlock], pbSrc, bytesToProcess ); + SymCryptXorBytes( + &pState->keystreamBlock[pState->bytesInMacBlock], + &pState->macBlock[pState->bytesInMacBlock], + pbDst, + bytesToProcess ); + + pbSrc += bytesToProcess; + pbDst += bytesToProcess; + cbData -= bytesToProcess; + pState->bytesInMacBlock += bytesToProcess; + + if( pState->bytesInMacBlock == SYMCRYPT_GCM_BLOCK_SIZE ) + { + SymCryptGHashAppendData( &pState->pKey->ghashKey, + &pState->ghashState, + &pState->macBlock[0], + SYMCRYPT_GCM_BLOCK_SIZE ); + pState->bytesInMacBlock = 0; + } + + // + // If there are bytes left in the key stream buffer, then cbData == 0 and we're done. + // If we used up all the bytes, then we are fine, no need to compute the next key stream block + // + } + + if( cbData >= SYMCRYPT_GCM_BLOCK_SIZE ) + { + bytesToProcess = cbData & SYMCRYPT_GCM_BLOCK_ROUND_MASK; + + // + // We use a Gcm function that increments the CTR by 64 bits, rather than the 32 bits that GCM requires. + // As we only support 12-byte nonces, the 32-bit counter never overflows, and we can safely use + // the 64-bit incrementing primitive. + // If we ever support other nonce sizes this is going to be a big problem. + // You can't fake a 32-bit counter using a 64-bit counter function without side-channels that expose + // information about the current counter value. + // With other nonce sizes the actual counter value itself is not public, so we can't expose that. + // We can do two things: + // - create SymCryptAesGcmDecryptXXX32 + // - Accept that we leak information about the counter value; after all it is not treated as a + // secret when the nonce is 12 bytes. + // + SYMCRYPT_ASSERT( pState->pKey->pBlockCipher->blockSize == SYMCRYPT_GCM_BLOCK_SIZE ); + +#if SYMCRYPT_CPU_AMD64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_VAES_256_CODE ) && + (bytesToProcess >= GCM_YMM_MINBLOCKS * SYMCRYPT_GCM_BLOCK_SIZE) && + SymCryptSaveYmm( &SaveData ) == SYMCRYPT_NO_ERROR ) + { + SymCryptAesGcmDecryptStitchedYmm_2048( + &pState->pKey->blockcipherKey.aes, + &pState->counterBlock[0], + &pState->pKey->ghashKey.table[0], + &pState->ghashState, + pbSrc, + pbDst, + bytesToProcess ); + + SymCryptRestoreYmm( &SaveData ); + } else { + SymCryptAesGcmDecryptStitchedXmm( + &pState->pKey->blockcipherKey.aes, + &pState->counterBlock[0], + &pState->pKey->ghashKey.table[0], + &pState->ghashState, + pbSrc, + pbDst, + bytesToProcess ); + } + +#elif SYMCRYPT_CPU_X86 + SymCryptAesGcmDecryptStitchedXmm( + &pState->pKey->blockcipherKey.aes, + &pState->counterBlock[0], + (PSYMCRYPT_GF128_ELEMENT)&pState->pKey->ghashKey.tableSpace[pState->pKey->ghashKey.tableOffset], + &pState->ghashState, + pbSrc, + pbDst, + bytesToProcess ); + +#elif SYMCRYPT_CPU_ARM64 + SymCryptAesGcmDecryptStitchedNeon( + &pState->pKey->blockcipherKey.aes, + &pState->counterBlock[0], + &pState->pKey->ghashKey.table[0], + &pState->ghashState, + pbSrc, + pbDst, + bytesToProcess ); + +#else + SymCryptGHashAppendData(&pState->pKey->ghashKey, + &pState->ghashState, + pbSrc, + cbData ); + // + // Do the actual decryption + // This violates the read-once rule, but it is safe for the same reasons as above + // in the encryption case. + // + SymCryptAesCtrMsb32(&pState->pKey->blockcipherKey.aes, + &pState->counterBlock[0], + pbSrc, + pbDst, + cbData ); + +#endif + pbSrc += bytesToProcess; + pbDst += bytesToProcess; + cbData -= bytesToProcess; + } + + if( cbData > 0 ) + { + SymCryptWipeKnownSize( &pState->keystreamBlock[0], SYMCRYPT_GCM_BLOCK_SIZE ); + + SYMCRYPT_ASSERT( pState->pKey->pBlockCipher->blockSize == SYMCRYPT_GCM_BLOCK_SIZE ); + SymCryptAesCtrMsb32(&pState->pKey->blockcipherKey.aes, + &pState->counterBlock[0], + &pState->keystreamBlock[0], + &pState->keystreamBlock[0], + SYMCRYPT_GCM_BLOCK_SIZE ); + + memcpy( &pState->macBlock[0], pbSrc, cbData ); + SymCryptXorBytes( + &pState->keystreamBlock[0], + &pState->macBlock[0], + pbDst, + cbData ); + + pState->bytesInMacBlock = cbData; + + // + // pState->cbData contains the data length after this call already, so it knows how many + // bytes are left in the keystream block + // + } +} + +VOID +SYMCRYPT_CALL +SymCryptAesGcmEncryptPart( + _Inout_ PSYMCRYPT_GCM_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ +#if SYMCRYPT_CPU_AMD64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_PCLMULQDQ_CODE ) ) + { + SymCryptAesGcmEncryptPartOnePass( pState, pbSrc, pbDst, cbData ); + } else { + SymCryptGcmEncryptPartTwoPass( pState, pbSrc, pbDst, cbData ); + } + +#elif SYMCRYPT_CPU_X86 + SYMCRYPT_EXTENDED_SAVE_DATA SaveData; + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_PCLMULQDQ_CODE ) && + SymCryptSaveXmm( &SaveData ) == SYMCRYPT_NO_ERROR ) + { + SymCryptAesGcmEncryptPartOnePass( pState, pbSrc, pbDst, cbData ); + SymCryptRestoreXmm( &SaveData ); + } else { + SymCryptGcmEncryptPartTwoPass( pState, pbSrc, pbDst, cbData ); + } + +#elif SYMCRYPT_CPU_ARM64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_NEON_AES | SYMCRYPT_CPU_FEATURE_NEON_PMULL ) ) + { + SymCryptAesGcmEncryptPartOnePass( pState, pbSrc, pbDst, cbData ); + } else { + SymCryptGcmEncryptPartTwoPass( pState, pbSrc, pbDst, cbData ); + } + +#else + SymCryptGcmEncryptPartTwoPass( pState, pbSrc, pbDst, cbData ); +#endif +} + +VOID +SYMCRYPT_CALL +SymCryptAesGcmDecryptPart( + _Inout_ PSYMCRYPT_GCM_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ +#if SYMCRYPT_CPU_AMD64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_PCLMULQDQ_CODE ) ) + { + SymCryptAesGcmDecryptPartOnePass( pState, pbSrc, pbDst, cbData ); + } else { + SymCryptGcmDecryptPartTwoPass( pState, pbSrc, pbDst, cbData ); + } + +#elif SYMCRYPT_CPU_X86 + SYMCRYPT_EXTENDED_SAVE_DATA SaveData; + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_PCLMULQDQ_CODE ) && + SymCryptSaveXmm( &SaveData ) == SYMCRYPT_NO_ERROR ) + { + SymCryptAesGcmDecryptPartOnePass( pState, pbSrc, pbDst, cbData ); + SymCryptRestoreXmm( &SaveData ); + } else { + SymCryptGcmDecryptPartTwoPass( pState, pbSrc, pbDst, cbData ); + } + +#elif SYMCRYPT_CPU_ARM64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_NEON_AES | SYMCRYPT_CPU_FEATURE_NEON_PMULL ) ) + { + SymCryptAesGcmDecryptPartOnePass( pState, pbSrc, pbDst, cbData ); + } else { + SymCryptGcmDecryptPartTwoPass( pState, pbSrc, pbDst, cbData ); + } + +#else + SymCryptGcmDecryptPartTwoPass( pState, pbSrc, pbDst, cbData ); +#endif +} diff --git a/libs/symcrypt/lib/aes-key.c b/libs/symcrypt/lib/aes-key.c new file mode 100644 index 00000000000..e584403914a --- /dev/null +++ b/libs/symcrypt/lib/aes-key.c @@ -0,0 +1,437 @@ +// +// aes.c code for AES implementation +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// The actual encryption and decryption routines here are not nearly as fast as the +// assembler ones. They are used on platforms that don't have assembler implementations +// and for various testing purposes. +// +// This code derives from the orignal fast AES code that Niels Ferguson wrote +// for BitLocker in Windows Vista. +// The C code is derived from the AES that was already in the RSA32 library, +// the assembler code was created new at that time. +// + + +#include "precomp.h" + + +/////////////////////////////////////////////////////////////////////////////// +// Key expansion uses two functions, a 4-byte S-box lookup and one +// to create a decryption round key from an encryption round key. +// These are the C implementations of these functions +// + + +static BYTE g_SymCryptAesRoundConstant[11] = +{ + 0, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, +}; + +SYMCRYPT_NOINLINE +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptAesExpandKeyInternal( + _Out_ PSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey, + BOOLEAN fCreateDecryptionKeys ) +{ + UINT32 nRounds; + BYTE * p; + BYTE * q; + UINT32 i; + UINT32 t; + + BOOL UseSimd = FALSE; + SYMCRYPT_ERROR status = SYMCRYPT_NO_ERROR; + +#if SYMCRYPT_CPU_X86 + SYMCRYPT_EXTENDED_SAVE_DATA SaveData; + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE ) ) + { + if( SymCryptSaveXmm( &SaveData ) == SYMCRYPT_NO_ERROR ) + { + UseSimd = TRUE; + } + } +#elif SYMCRYPT_CPU_AMD64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE ) ) + { + UseSimd = TRUE; + } +#elif SYMCRYPT_CPU_ARM64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_NEON_AES ) ) + { + UseSimd = TRUE; + } +#endif + + SYMCRYPT_SET_MAGIC( pExpandedKey ); + + // + // Separate code for each key size, this is significantly faster. + // We have a number of applications that do frequent key expansions. + // + switch( cbKey ) + { + case 16: + nRounds = 10; + pExpandedKey->lastEncRoundKey = &pExpandedKey->RoundKey[nRounds]; + pExpandedKey->lastDecRoundKey = &pExpandedKey->RoundKey[2*nRounds]; + + memcpy( &pExpandedKey->RoundKey[0], pbKey, 16 ); + + p = (BYTE *)&pExpandedKey->RoundKey[1]; + + for( i=1; i<=nRounds; i++ ) + { + SymCryptAes4Sbox( &p[-4], p, UseSimd ); + t = ROR32(SYMCRYPT_LOAD_LSBFIRST32(p), 8) ^ SYMCRYPT_LOAD_LSBFIRST32(p - 16) ^ g_SymCryptAesRoundConstant[i]; + SYMCRYPT_STORE_LSBFIRST32( p, t ); // this is a macro that re-evaluates its arguments + + *(UINT32 *)(p+4) = *(UINT32 *) p ^ *(UINT32 *)(p - 12); + *(UINT32 *)(p+8) = *(UINT32 *)(p+4) ^ *(UINT32 *)(p - 8); + *(UINT32 *)(p+12) = *(UINT32 *)(p+8) ^ *(UINT32 *)(p - 4); + + p += 16; + } + + break; + + case 24: + nRounds = 12; + pExpandedKey->lastEncRoundKey = &pExpandedKey->RoundKey[nRounds]; + pExpandedKey->lastDecRoundKey = &pExpandedKey->RoundKey[2*nRounds]; + + memcpy( &pExpandedKey->RoundKey[0], pbKey, 24 ); + + p = (BYTE *)&pExpandedKey->RoundKey[0] + 24; + + // + // We have 12 rounds, 13 round keys, and 13*16 = 208 bytes of encryption key to generate. + // We have 24 already, so we need 184 more. + // Each iteration produces 24 bytes, so we need to loop 8 times. + // + for( i=1; i<=8; i++ ) + { + SymCryptAes4Sbox( &p[-4], p, UseSimd ); + t = ROR32(SYMCRYPT_LOAD_LSBFIRST32(p), 8) ^ SYMCRYPT_LOAD_LSBFIRST32(p - 24) ^ g_SymCryptAesRoundConstant[i]; + SYMCRYPT_STORE_LSBFIRST32( p, t ); + + *(UINT32 *)(p+4) = *(UINT32 *) p ^ *(UINT32 *)(p - 20); + *(UINT32 *)(p+8) = *(UINT32 *)(p+ 4) ^ *(UINT32 *)(p - 16); + *(UINT32 *)(p+12) = *(UINT32 *)(p+ 8) ^ *(UINT32 *)(p - 12); + *(UINT32 *)(p+16) = *(UINT32 *)(p+12) ^ *(UINT32 *)(p - 8); + *(UINT32 *)(p+20) = *(UINT32 *)(p+16) ^ *(UINT32 *)(p - 4); + + p += 24; + } + + break; + + case 32: + nRounds = 14; + pExpandedKey->lastEncRoundKey = &pExpandedKey->RoundKey[nRounds]; + pExpandedKey->lastDecRoundKey = &pExpandedKey->RoundKey[2*nRounds]; + + memcpy( &pExpandedKey->RoundKey[0], pbKey, 32 ); + + p = (BYTE *)&pExpandedKey->RoundKey[0] + 32; + + // + // We have 14 rounds, 15 round keys, and 15*16 = 240 bytes of encryption key to generate. + // We have 32 already, so we need 208 more. + // Each iteration produces 32 bytes, so we need to loop 6.5 times. + // + for( i=1; i<=6; i++ ) + { + SymCryptAes4Sbox( &p[-4], p, UseSimd ); + t = ROR32(SYMCRYPT_LOAD_LSBFIRST32(p), 8) ^ SYMCRYPT_LOAD_LSBFIRST32(p - 32) ^ g_SymCryptAesRoundConstant[i]; + SYMCRYPT_STORE_LSBFIRST32( p, t ); + + *(UINT32 *)(p+4) = *(UINT32 *) p ^ *(UINT32 *)(p - 28); + *(UINT32 *)(p+8) = *(UINT32 *)(p + 4) ^ *(UINT32 *)(p - 24); + *(UINT32 *)(p+12) = *(UINT32 *)(p + 8) ^ *(UINT32 *)(p - 20); + + SymCryptAes4Sbox( &p[12], &p[16], UseSimd ); + *(UINT32 *)(p+16) = *(UINT32 *)(p + 16) ^ *(UINT32 *)(p - 16); + + *(UINT32 *)(p+20) = *(UINT32 *)(p + 16) ^ *(UINT32 *)(p - 12); + *(UINT32 *)(p+24) = *(UINT32 *)(p + 20) ^ *(UINT32 *)(p - 8); + *(UINT32 *)(p+28) = *(UINT32 *)(p + 24) ^ *(UINT32 *)(p - 4); + + p += 32; + } + + // We looped 6 times, so here is the half-loop + + SymCryptAes4Sbox( &p[-4], p, UseSimd ); + t = ROR32(SYMCRYPT_LOAD_LSBFIRST32(p), 8) ^ SYMCRYPT_LOAD_LSBFIRST32(p - 32) ^ g_SymCryptAesRoundConstant[i]; + SYMCRYPT_STORE_LSBFIRST32( p, t ); + + *(UINT32 *)(p+4) = *(UINT32 *) p ^ *(UINT32 *)(p - 28); + *(UINT32 *)(p+8) = *(UINT32 *)(p + 4) ^ *(UINT32 *)(p - 24); + *(UINT32 *)(p+12) = *(UINT32 *)(p + 8) ^ *(UINT32 *)(p - 20); + + break; + + default: + status = SYMCRYPT_WRONG_KEY_SIZE; + goto cleanup; + } + + + if( fCreateDecryptionKeys ) + { + p = &pExpandedKey->RoundKey[0][0][0]; + q = (PBYTE)(pExpandedKey->lastDecRoundKey); + + // The first encryption round key is the last decryption round key + memcpy( q, p, SYMCRYPT_AES_BLOCK_SIZE ); + p += 16; + q -= 16; + + while( p < (PBYTE) pExpandedKey->lastEncRoundKey ) + { + SymCryptAesCreateDecryptionRoundKey( p, q, UseSimd ); + q -= 16; + p += 16; + } + } + +cleanup: + +#if SYMCRYPT_CPU_X86 + if( UseSimd ) + { + SymCryptRestoreXmm( &SaveData ); + } +#endif + + return status; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptAesExpandKey( + _Out_ PSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ) + +{ + return SymCryptAesExpandKeyInternal( pExpandedKey, pbKey, cbKey, TRUE ); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptAesExpandKeyEncryptOnly( + _Out_ PSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ) +{ + return SymCryptAesExpandKeyInternal( pExpandedKey, pbKey, cbKey, FALSE ); +} + +VOID +SYMCRYPT_CALL +SymCryptAesKeyCopy( _In_ PCSYMCRYPT_AES_EXPANDED_KEY pSrc, + _Out_ PSYMCRYPT_AES_EXPANDED_KEY pDst ) +{ + SYMCRYPT_CHECK_MAGIC( pSrc ); + + *pDst = *pSrc; + pDst->lastEncRoundKey = &pDst->RoundKey[0] + (pSrc->lastEncRoundKey - &pSrc->RoundKey[0]); + pDst->lastDecRoundKey = &pDst->RoundKey[0] + (pSrc->lastDecRoundKey - &pSrc->RoundKey[0]); + + SYMCRYPT_SET_MAGIC( pDst ); +} + +// +// Self test code +// + + +const BYTE SymCryptAesNistTestVector128Ciphertext[16] = { + 0x69, 0xc4, 0xe0, 0xd8, 0x6a, 0x7b, 0x04, 0x30, + 0xd8, 0xcd, 0xb7, 0x80, 0x70, 0xb4, 0xc5, 0x5a, +}; + + + +/**************************************************************** + * OLD CODE + * + * Old code to generate the AES tables dynamically. + * Kept for future reference. + * + + +// +// Prototype; on some platforms this function is in assembler. +// +VOID +SYMCRYPT_CALL +SymCryptAesCreateRotatedTables( BYTE MatrixMult[4][256][4] ); + +VOID +SYMCRYPT_CALL +SymCryptAesCreateRotatedTables( _Inout_ BYTE MatrixMult[4][256][4] ) +{ + int i,j,k; + + // + // We do this byte-by-byte, which is easiest. + // It would be faster to use UINT32 operations, + // but that is endian-specific, and therefore platform-specific. + // Endian-agnostic UINT32-based code would be a lot more complicated. + // All this is extremely easy to do in assembler, which we do on those + // platforms that have assembler implementations. + // + for( j=1; j<4; j++ ) { + for( i=0; i<256; i++ ) { + for( k=0; k<4; k++ ) { + MatrixMult[j][i][k] = MatrixMult[0][i][(k-j)&3]; + } + } + } +} + + + +// +// SymCryptAesInitMatrixMultiplyTable +// +// Initialize a matrix multiplication table. +// Each matrix multiplication table consists of 4 tables of 256 entries of 4 bytes each. +// The four tables are rotated copies of each other. +// This function generates the first of those four tables from the init +// value. +// +// After this call: +// At index i the table contains the four bytes +// i * init[0], i * init[1], i * init[2], i * init[3] +// where multiplication is in GF(2^8). +// +// We do not do a GF(2^8) multiplication for each entry, but rather use the +// relationship (a xor b) * init[.] = a * init[.] xor b * init[.] +// And only compute i*init[.] for i = 1,2,4,8,...,128. This can be done +// using repeated multiplication by x in the finite field. +// +// It is safe to call this function on two separate threads for the same table. +// All invocations will write the same data to the table, and within a tread each entry is written +// before it is read. Doing parallel initializations of the same table can be very inefficient +// as multiple cores will be fighting over the cache lines, but the result will be correct. +// We use this property to initialize the tables lazily. +// +static +VOID +SYMCRYPT_CALL +SymCryptAesInitMatrixMultiplyTable( _Out_ SYMCRYPT_ALIGN BYTE MatrixMult[256][4], + _In_ SYMCRYPT_ALIGN BYTE init[4] + ) +{ + int i,j; + SYMCRYPT_ALIGN BYTE initCopy[4]; + UINT32 initCopyAsUint32; + + // + // We copy the init value so that we can modify it without worrying about multi-threading + // issues. + // + *(UINT32 *)initCopy = *(UINT32 *)init; + + *(UINT32 *)MatrixMult[0] = 0; + for( i=1; i<256; i<<=1 ) + { + initCopyAsUint32 = *(UINT32 *)initCopy; + for( j=0; j<i; j++ ) + { + *(UINT32 *)MatrixMult[i+j] = *(UINT32 *)MatrixMult[j] ^ initCopyAsUint32; + } + for( j=0; j<4; j++ ) + { + initCopy[j] = MULT_BY_X( initCopy[j] ); + } + } +} + + +// +// SymCryptAesInitialize +// +// Initialize the static tables for the AES implementation. +// This function is called by the key expansion function if it finds the +// tables not initialized. +// +// This leads to an interesting case where multiple threads running on multiple +// CPUs run this initialization code at the same time. +// This code is carefully structured to allow that. When global data is written it is +// always with the final value, and we never read uninitialized global data. +// Thus, even if two CPUs run this code at the same time, they will both initialize each +// memory location to the same correct value and the end result will be correct. +// (Performance will suffer due to the fact that cache lines will be bounced back and force +// between the two CPUs, but that is not a significant concern as this code is used only once.) +// +// At the end of the initialization the flag is set to indicate that further +// key expansion invocations do not need to re-run the initialization. +// We use memory barriers to keep this multi-thread safe. +// +static +VOID +SYMCRYPT_CALL +SymCryptAesInitialize(void) +{ + int i,j; + BYTE S; + BYTE Stimes2; + + // + // We force alignment of these arrays as we sometimes treat them as a UINT32 + // + SYMCRYPT_ALIGN BYTE InvMatrixEntry[4] = {0xe, 0x9, 0xd, 0xb}; + SYMCRYPT_ALIGN BYTE MatrixEntry[4] = {2, 1, 1, 3}; + SYMCRYPT_ALIGN BYTE MatrixScratch[256][4]; + + // Generate the forward MDS multiplication table in the scratch space + SymCryptAesInitMatrixMultiplyTable( MatrixScratch, MatrixEntry ); + + // Initialize first table of SymCryptAesInvMatrixMult + SymCryptAesInitMatrixMultiplyTable( SymCryptAesInvMatrixMult[0], InvMatrixEntry ); + + // + // Build the InvSbox table and the first table of SymCryptAesSboxMatrixMult and + // SymCryptAesInvSboxMatrixMult + // + for( i=0; i<256; i++ ) { + S = SymCryptAesSbox[i]; + SymCryptAesInvSbox[S] = (BYTE) i; + *(UINT32 *)SymCryptAesSboxMatrixMult[0][i] = *(UINT32 *)MatrixScratch[S]; + *(UINT32 *)SymCryptAesInvSboxMatrixMult[0][S] = *(UINT32 *)SymCryptAesInvMatrixMult[0][i]; + } + + // + // Now we generate the byte rotations of the tables + // + SymCryptAesCreateRotatedTables( SymCryptAesSboxMatrixMult ); + SymCryptAesCreateRotatedTables( SymCryptAesInvSboxMatrixMult ); + SymCryptAesCreateRotatedTables( SymCryptAesInvMatrixMult ); + + // + // This is a memory barrier. It ensures that all the memory writes we do before the barrier + // are globally visible to other CPUs before the memory writes we do after the fence. + // In this particular case, it ensures that every CPU sees the completed tables before + // it sees the flag as set. + // + MemoryBarrier(); + + // + // Set the flag to signal that the tables are initialized. + // + SymCryptAesTablesInitialized = TRUE; +} + + +*/ diff --git a/libs/symcrypt/lib/aes-neon.c b/libs/symcrypt/lib/aes-neon.c new file mode 100644 index 00000000000..3c0d3fb1817 --- /dev/null +++ b/libs/symcrypt/lib/aes-neon.c @@ -0,0 +1,1889 @@ +// +// aes-neon.c code for AES implementation +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// All NEON-based code for AES operations +// + +#include "precomp.h" + +#if SYMCRYPT_CPU_ARM64 + +#pragma clang attribute push (__attribute__((target("aes"))), apply_to=function) + +#define vzeroq() vdupq_n_u64(0) + + +VOID +SYMCRYPT_CALL +SymCryptAes4SboxNeon( _In_reads_(4) PCBYTE pIn, _Out_writes_(4) PBYTE pOut ) +{ + /* + __m128i x; + + x = _mm_set1_epi32( *(int *) pIn ); + + x = _mm_aeskeygenassist_si128( x, 0 ); + + *(unsigned *) pOut = x.m128i_u32[0]; + */ + __n128 x; + + // + // There is no pure S-box lookup instruction, but the AESE instruction + // does a ShiftRow followed by a SubBytes. + // If we duplicate the input value to all 4 lanes, then the ShiftRow does nothing + // and the SubBytes will do the S-box lookup. + // + x = vdupq_n_u32( *(unsigned int *) pIn ); + x = vaeseq_u8( x, vzeroq() ); + vst1q_lane_s32( pOut, x, 0 ); + //*(unsigned int *) pOut = x.n128_u32[0]; +} + + +VOID +SYMCRYPT_CALL +SymCryptAesCreateDecryptionRoundKeyNeon( + _In_reads_(16) PCBYTE pEncryptionRoundKey, + _Out_writes_(16) PBYTE pDecryptionRoundKey ) +{ + *(__n128 *) pDecryptionRoundKey = vaesimcq_u8( *(__n128 *)pEncryptionRoundKey ); +} + +// +// When doing a full round of AES encryption, make sure to give compiler opportunity to schedule dependent +// aese/aesmc pairs to enable instruction fusion in many arm64 CPUs +// +#define AESE_AESMC( c, rk ) \ +{ \ + c = vaeseq_u8( c, rk ); \ + c = vaesmcq_u8( c ); \ +}; + +// +// When doing a full round of AES decryption, make sure to give compiler opportunity to schedule dependent +// aesd/aesimc pairs to enable instruction fusion in many arm64 CPUs +// +#define AESD_AESIMC( c, rk ) \ +{ \ + c = vaesdq_u8( c, rk ); \ + c = vaesimcq_u8( c ); \ +}; + +// +// Using a loop with AESE_AESMC and AESD_AESIMC, the compiler can still prematurely rearrange the loop and +// lose opportunity for scheduling adjacent pairs. +// Instead, explicitly unroll the AES rounds with this macro. +// Takes the name of first_round, full_round, and final_round macros, and uses them to construct block to +// handle AES (128|192|256) for either encrypt or decrypt. For now assume only need at most 8 state +// variables in the macros. +// Assumes roundKey, keyPtr, and keyLimit are defined in calling context. +// +#define UNROLL_AES_ROUNDS_FIRST( first_round, full_round, final_round, c0, c1, c2, c3, c4, c5, c6, c7 ) \ +{ \ + /* Do 9 full rounds (AES-128|AES-192|AES-256) */ \ + roundKey = *keyPtr++; \ + first_round( c0, c1, c2, c3, c4, c5, c6, c7 ) \ + roundKey = *keyPtr++; \ + full_round( c0, c1, c2, c3, c4, c5, c6, c7 ) \ + roundKey = *keyPtr++; \ + full_round( c0, c1, c2, c3, c4, c5, c6, c7 ) \ + roundKey = *keyPtr++; \ + full_round( c0, c1, c2, c3, c4, c5, c6, c7 ) \ + roundKey = *keyPtr++; \ + full_round( c0, c1, c2, c3, c4, c5, c6, c7 ) \ + roundKey = *keyPtr++; \ + full_round( c0, c1, c2, c3, c4, c5, c6, c7 ) \ + roundKey = *keyPtr++; \ + full_round( c0, c1, c2, c3, c4, c5, c6, c7 ) \ + roundKey = *keyPtr++; \ + full_round( c0, c1, c2, c3, c4, c5, c6, c7 ) \ + roundKey = *keyPtr++; \ + full_round( c0, c1, c2, c3, c4, c5, c6, c7 ) \ + roundKey = *keyPtr++; \ +\ + if ( keyPtr < keyLimit ) \ + { \ + /* Do 2 more full rounds (AES-192|AES-256) */ \ + full_round( c0, c1, c2, c3, c4, c5, c6, c7 ) \ + roundKey = *keyPtr++; \ + full_round( c0, c1, c2, c3, c4, c5, c6, c7 ) \ + roundKey = *keyPtr++; \ +\ + if ( keyPtr < keyLimit ) \ + { \ + /* Do 2 more full rounds (AES-256) */ \ + full_round( c0, c1, c2, c3, c4, c5, c6, c7 ) \ + roundKey = *keyPtr++; \ + full_round( c0, c1, c2, c3, c4, c5, c6, c7 ) \ + roundKey = *keyPtr++; \ + } \ + } \ +\ + /* Do final round (AES-128|AES-192|AES-256) */ \ + final_round( c0, c1, c2, c3, c4, c5, c6, c7 ) \ +}; + +// Only AES_ENCRYPT_1_CHAIN needs to specify the first round differently from the full round +#define UNROLL_AES_ROUNDS( full_round, final_round, c0, c1, c2, c3, c4, c5, c6, c7 ) \ + UNROLL_AES_ROUNDS_FIRST( full_round, full_round, final_round, c0, c1, c2, c3, c4, c5, c6, c7 ) + +#define AES_ENCRYPT_ROUND_1( c0, c1, c2, c3, c4, c5, c6, c7 ) \ +{ \ + AESE_AESMC( c0, roundKey ) \ +}; +#define AES_ENCRYPT_FINAL_1( c0, c1, c2, c3, c4, c5, c6, c7 ) \ +{ \ + c0 = vaeseq_u8( c0, roundKey ); \ + roundKey = *keyPtr; \ + c0 = veorq_u8( c0, roundKey ); \ +}; + +#define AES_ENCRYPT_1( pExpandedKey, c0 ) \ +{ \ + const __n128 *keyPtr; \ + const __n128 *keyLimit; \ + __n128 roundKey; \ +\ + keyPtr = (const __n128 *)&pExpandedKey->RoundKey[0]; \ + keyLimit = (const __n128 *)pExpandedKey->lastEncRoundKey; \ +\ + UNROLL_AES_ROUNDS( \ + AES_ENCRYPT_ROUND_1, \ + AES_ENCRYPT_FINAL_1, \ + c0, c1, c2, c3, c4, c5, c6, c7 \ + ) \ +}; + +// Perform AES encryption without the last round key and with a specified first round key +// +// For algorithms where performance is dominated by a chain of dependent AES rounds (i.e. CBC encryption, CCM, CMAC) +// we can gain a reasonable performance uplift by computing (last round key ^ this plaintext block ^ first round key) +// off the critical path and using this computed value in place of first round key in the first AESE instruction. +#define AES_ENCRYPT_CHAIN_FIRST_1( c0, mergedFirstRoundKey, c2, c3, c4, c5, c6, c7 ) \ +{ \ + AESE_AESMC( c0, mergedFirstRoundKey ) \ +}; +#define AES_ENCRYPT_CHAIN_FINAL_1( c0, c1, c2, c3, c4, c5, c6, c7 ) \ +{ \ + c0 = vaeseq_u8( c0, roundKey ); \ +}; + +#define AES_ENCRYPT_1_CHAIN( pExpandedKey, c0, mergedFirstRoundKey ) \ +{ \ + const __n128 *keyPtr; \ + const __n128 *keyLimit; \ + __n128 roundKey; \ +\ + keyPtr = (const __n128 *)&pExpandedKey->RoundKey[0]; \ + keyLimit = (const __n128 *)pExpandedKey->lastEncRoundKey; \ +\ + UNROLL_AES_ROUNDS_FIRST( \ + AES_ENCRYPT_CHAIN_FIRST_1, \ + AES_ENCRYPT_ROUND_1, \ + AES_ENCRYPT_CHAIN_FINAL_1, \ + c0, mergedFirstRoundKey, c2, c3, c4, c5, c6, c7 \ + ) \ +}; + +#define AES_ENCRYPT_ROUND_4( c0, c1, c2, c3, c4, c5, c6, c7 ) \ +{ \ + AESE_AESMC( c0, roundKey ) \ + AESE_AESMC( c1, roundKey ) \ + AESE_AESMC( c2, roundKey ) \ + AESE_AESMC( c3, roundKey ) \ +}; +#define AES_ENCRYPT_FINAL_4( c0, c1, c2, c3, c4, c5, c6, c7 ) \ +{ \ + c0 = vaeseq_u8( c0, roundKey ); \ + c1 = vaeseq_u8( c1, roundKey ); \ + c2 = vaeseq_u8( c2, roundKey ); \ + c3 = vaeseq_u8( c3, roundKey ); \ + roundKey = *keyPtr; \ + c0 = veorq_u8( c0, roundKey ); \ + c1 = veorq_u8( c1, roundKey ); \ + c2 = veorq_u8( c2, roundKey ); \ + c3 = veorq_u8( c3, roundKey ); \ +}; + +#define AES_ENCRYPT_4( pExpandedKey, c0, c1, c2, c3 ) \ +{ \ + const __n128 *keyPtr; \ + const __n128 *keyLimit; \ + __n128 roundKey; \ +\ + keyPtr = (const __n128 *)&pExpandedKey->RoundKey[0]; \ + keyLimit = (const __n128 *)pExpandedKey->lastEncRoundKey; \ +\ + UNROLL_AES_ROUNDS( \ + AES_ENCRYPT_ROUND_4, \ + AES_ENCRYPT_FINAL_4, \ + c0, c1, c2, c3, c4, c5, c6, c7 \ + ) \ +}; + +#define AES_ENCRYPT_ROUND_8( c0, c1, c2, c3, c4, c5, c6, c7 ) \ +{ \ + AESE_AESMC( c0, roundKey ) \ + AESE_AESMC( c1, roundKey ) \ + AESE_AESMC( c2, roundKey ) \ + AESE_AESMC( c3, roundKey ) \ + AESE_AESMC( c4, roundKey ) \ + AESE_AESMC( c5, roundKey ) \ + AESE_AESMC( c6, roundKey ) \ + AESE_AESMC( c7, roundKey ) \ +}; +#define AES_ENCRYPT_FINAL_8( c0, c1, c2, c3, c4, c5, c6, c7 ) \ +{ \ + c0 = vaeseq_u8( c0, roundKey ); \ + c1 = vaeseq_u8( c1, roundKey ); \ + c2 = vaeseq_u8( c2, roundKey ); \ + c3 = vaeseq_u8( c3, roundKey ); \ + c4 = vaeseq_u8( c4, roundKey ); \ + c5 = vaeseq_u8( c5, roundKey ); \ + c6 = vaeseq_u8( c6, roundKey ); \ + c7 = vaeseq_u8( c7, roundKey ); \ + roundKey = *keyPtr; \ + c0 = veorq_u8( c0, roundKey ); \ + c1 = veorq_u8( c1, roundKey ); \ + c2 = veorq_u8( c2, roundKey ); \ + c3 = veorq_u8( c3, roundKey ); \ + c4 = veorq_u8( c4, roundKey ); \ + c5 = veorq_u8( c5, roundKey ); \ + c6 = veorq_u8( c6, roundKey ); \ + c7 = veorq_u8( c7, roundKey ); \ +}; + +#define AES_ENCRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 ) \ +{ \ + const __n128 *keyPtr; \ + const __n128 *keyLimit; \ + __n128 roundKey; \ +\ + keyPtr = (const __n128 *)&pExpandedKey->RoundKey[0]; \ + keyLimit = (const __n128 *)pExpandedKey->lastEncRoundKey; \ +\ + UNROLL_AES_ROUNDS( \ + AES_ENCRYPT_ROUND_8, \ + AES_ENCRYPT_FINAL_8, \ + c0, c1, c2, c3, c4, c5, c6, c7 \ + ) \ +}; + +#define AES_DECRYPT_ROUND_1( c0, c1, c2, c3, c4, c5, c6, c7 ) \ +{ \ + AESD_AESIMC( c0, roundKey ) \ +}; +#define AES_DECRYPT_FINAL_1( c0, c1, c2, c3, c4, c5, c6, c7 ) \ +{ \ + c0 = vaesdq_u8( c0, roundKey ); \ + roundKey = *keyPtr; \ + c0 = veorq_u8( c0, roundKey ); \ +}; + +#define AES_DECRYPT_1( pExpandedKey, c0 ) \ +{ \ + const __n128 *keyPtr; \ + const __n128 *keyLimit; \ + __n128 roundKey; \ +\ + keyPtr = (const __n128 *)pExpandedKey->lastEncRoundKey; \ + keyLimit = (const __n128 *)pExpandedKey->lastDecRoundKey; \ +\ + UNROLL_AES_ROUNDS( \ + AES_DECRYPT_ROUND_1, \ + AES_DECRYPT_FINAL_1, \ + c0, c1, c2, c3, c4, c5, c6, c7 \ + ) \ +}; + +#define AES_DECRYPT_ROUND_4( c0, c1, c2, c3, c4, c5, c6, c7 ) \ +{ \ + AESD_AESIMC( c0, roundKey ) \ + AESD_AESIMC( c1, roundKey ) \ + AESD_AESIMC( c2, roundKey ) \ + AESD_AESIMC( c3, roundKey ) \ +}; +#define AES_DECRYPT_FINAL_4( c0, c1, c2, c3, c4, c5, c6, c7 ) \ +{ \ + c0 = vaesdq_u8( c0, roundKey ); \ + c1 = vaesdq_u8( c1, roundKey ); \ + c2 = vaesdq_u8( c2, roundKey ); \ + c3 = vaesdq_u8( c3, roundKey ); \ + roundKey = *keyPtr; \ + c0 = veorq_u8( c0, roundKey ); \ + c1 = veorq_u8( c1, roundKey ); \ + c2 = veorq_u8( c2, roundKey ); \ + c3 = veorq_u8( c3, roundKey ); \ +}; + +#define AES_DECRYPT_4( pExpandedKey, c0, c1, c2, c3 ) \ +{ \ + const __n128 *keyPtr; \ + const __n128 *keyLimit; \ + __n128 roundKey; \ +\ + keyPtr = (const __n128 *)pExpandedKey->lastEncRoundKey; \ + keyLimit = (const __n128 *)pExpandedKey->lastDecRoundKey; \ +\ + UNROLL_AES_ROUNDS( \ + AES_DECRYPT_ROUND_4, \ + AES_DECRYPT_FINAL_4, \ + c0, c1, c2, c3, c4, c5, c6, c7 \ + ) \ +}; + +#define AES_DECRYPT_ROUND_8( c0, c1, c2, c3, c4, c5, c6, c7 ) \ +{ \ + AESD_AESIMC( c0, roundKey ) \ + AESD_AESIMC( c1, roundKey ) \ + AESD_AESIMC( c2, roundKey ) \ + AESD_AESIMC( c3, roundKey ) \ + AESD_AESIMC( c4, roundKey ) \ + AESD_AESIMC( c5, roundKey ) \ + AESD_AESIMC( c6, roundKey ) \ + AESD_AESIMC( c7, roundKey ) \ +}; +#define AES_DECRYPT_FINAL_8( c0, c1, c2, c3, c4, c5, c6, c7 ) \ +{ \ + c0 = vaesdq_u8( c0, roundKey ); \ + c1 = vaesdq_u8( c1, roundKey ); \ + c2 = vaesdq_u8( c2, roundKey ); \ + c3 = vaesdq_u8( c3, roundKey ); \ + c4 = vaesdq_u8( c4, roundKey ); \ + c5 = vaesdq_u8( c5, roundKey ); \ + c6 = vaesdq_u8( c6, roundKey ); \ + c7 = vaesdq_u8( c7, roundKey ); \ + roundKey = *keyPtr; \ + c0 = veorq_u8( c0, roundKey ); \ + c1 = veorq_u8( c1, roundKey ); \ + c2 = veorq_u8( c2, roundKey ); \ + c3 = veorq_u8( c3, roundKey ); \ + c4 = veorq_u8( c4, roundKey ); \ + c5 = veorq_u8( c5, roundKey ); \ + c6 = veorq_u8( c6, roundKey ); \ + c7 = veorq_u8( c7, roundKey ); \ +}; + +#define AES_DECRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 ) \ +{ \ + const __n128 *keyPtr; \ + const __n128 *keyLimit; \ + __n128 roundKey; \ +\ + keyPtr = (const __n128 *)pExpandedKey->lastEncRoundKey; \ + keyLimit = (const __n128 *)pExpandedKey->lastDecRoundKey; \ +\ + UNROLL_AES_ROUNDS( \ + AES_DECRYPT_ROUND_8, \ + AES_DECRYPT_FINAL_8, \ + c0, c1, c2, c3, c4, c5, c6, c7 \ + ) \ +}; + + + +VOID +SYMCRYPT_CALL +SymCryptAesEncryptNeon( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbDst ) +{ + __n128 c; + + c = *( __n128 * ) pbSrc; + + AES_ENCRYPT_1( pExpandedKey, c ); + + *(__n128 *) pbDst = c; +} + +VOID +SYMCRYPT_CALL +SymCryptAesDecryptNeon( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbDst ) +{ + __n128 c; + + c = *( __n128 * ) pbSrc; + + AES_DECRYPT_1( pExpandedKey, c ); + + *(__n128 *) pbDst = c; +} + + +VOID +SYMCRYPT_CALL +SymCryptAesCbcEncryptNeon( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + __n128 c = *(__n128 *)pbChainingValue; + __n128 rk0 = *(__n128 *) &pExpandedKey->RoundKey[0]; + __n128 rkLast = *(__n128 *) pExpandedKey->lastEncRoundKey; + __n128 d, rk0AndLast; + + // This algorithm is dominated by chain of dependent AES rounds, so we want to avoid EOR + // instructions on the critical path where possible + // We can compute (last round key ^ this plaintext block ^ first round key) off the critical + // path and use this with AES_ENCRYPT_1_CHAIN so that only AES instructions write to c in + // the main loop + rk0AndLast = veorq_u8( rk0, rkLast ); + + c = veorq_u8( c, rkLast ); + + while( cbData >= SYMCRYPT_AES_BLOCK_SIZE ) + { + d = veorq_u8( *(__n128 *)pbSrc, rk0AndLast); + AES_ENCRYPT_1_CHAIN( pExpandedKey, c, d ); + *(__n128 *)pbDst = veorq_u8( c, rkLast ); + + pbSrc += SYMCRYPT_AES_BLOCK_SIZE; + pbDst += SYMCRYPT_AES_BLOCK_SIZE; + cbData -= SYMCRYPT_AES_BLOCK_SIZE; + } + *(__n128 *)pbChainingValue = veorq_u8( c, rkLast ); +} + +// Disable warnings and VC++ runtime checks for use of uninitialized values (by design) +#pragma warning(push) +#pragma warning( disable: 6001 4701 ) +#pragma runtime_checks( "u", off ) +VOID +SYMCRYPT_CALL +SymCryptAesCbcDecryptNeon( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + __n128 chain; + __n128 c0, c1, c2, c3, c4, c5, c6, c7; + __n128 d0, d1, d2, d3, d4, d5, d6, d7; + const __n128 * pSrc = (const __n128 *) pbSrc; + __n128 * pDst = (__n128 *) pbDst; + SIZE_T cData = cbData / SYMCRYPT_AES_BLOCK_SIZE; + + if( cData < 1 ) + { + return; + } + + chain = *(__n128 *) pbChainingValue; + + // + // First we do all multiples of 8 blocks + // + + while( cData >= 8 ) + { + d0 = c0 = pSrc[0]; + d1 = c1 = pSrc[1]; + d2 = c2 = pSrc[2]; + d3 = c3 = pSrc[3]; + d4 = c4 = pSrc[4]; + d5 = c5 = pSrc[5]; + d6 = c6 = pSrc[6]; + d7 = c7 = pSrc[7]; + + AES_DECRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 ); + + c0 = veorq_u8( c0, chain ); + c1 = veorq_u8( c1, d0 ); + c2 = veorq_u8( c2, d1 ); + c3 = veorq_u8( c3, d2 ); + c4 = veorq_u8( c4, d3 ); + c5 = veorq_u8( c5, d4 ); + c6 = veorq_u8( c6, d5 ); + c7 = veorq_u8( c7, d6 ); + chain = d7; + + pDst[0] = c0; + pDst[1] = c1; + pDst[2] = c2; + pDst[3] = c3; + pDst[4] = c4; + pDst[5] = c5; + pDst[6] = c6; + pDst[7] = c7; + + pSrc += 8; + pDst += 8; + cData -= 8; + } + + if( cData >= 1 ) + { + // + // There is remaining work to be done + // + d0 = c0 = pSrc[0]; + if( cData >= 2 ) + { + d1 = c1 = pSrc[1]; + if( cData >= 3 ) + { + d2 = c2 = pSrc[2]; + if( cData >= 4 ) + { + d3 = c3 = pSrc[3]; + if( cData >= 5 ) + { + d4 = c4 = pSrc[4]; + if( cData >= 6 ) + { + d5 = c5 = pSrc[5]; + if( cData >= 7 ) + { + d6 = c6 = pSrc[6]; + } + } + } + } + } + } + + // + // Decrypt 1, 4, or 8 blocks in AES-CBC mode. This might decrypt uninitialized registers, + // but those will not be used when we store the results. + // + if( cData > 4 ) + { + AES_DECRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 ); + c0 = veorq_u8( c0, chain ); + c1 = veorq_u8( c1, d0 ); + c2 = veorq_u8( c2, d1 ); + c3 = veorq_u8( c3, d2 ); + c4 = veorq_u8( c4, d3 ); + c5 = veorq_u8( c5, d4 ); + c6 = veorq_u8( c6, d5 ); + } + else if( cData > 1 ) + { + AES_DECRYPT_4( pExpandedKey, c0, c1, c2, c3 ); + c0 = veorq_u8( c0, chain ); + c1 = veorq_u8( c1, d0 ); + c2 = veorq_u8( c2, d1 ); + c3 = veorq_u8( c3, d2 ); + } else + { + AES_DECRYPT_1( pExpandedKey, c0 ); + c0 = veorq_u8( c0, chain ); + } + + chain = pSrc[ cData - 1]; + pDst[0] = c0; + if( cData >= 2 ) + { + pDst[1] = c1; + if( cData >= 3 ) + { + pDst[2] = c2; + if( cData >= 4 ) + { + pDst[3] = c3; + if( cData >= 5 ) + { + pDst[4] = c4; + if( cData >= 6 ) + { + pDst[5] = c5; + if( cData >= 7 ) + { + pDst[6] = c6; + } + } + } + } + } + } + } + + *(__n128 *)pbChainingValue = chain; + + return; +} +#pragma runtime_checks( "u", restore ) +#pragma warning( pop ) + + + +VOID +SYMCRYPT_CALL +SymCryptAesCbcMacNeon( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + __n128 c = *(__n128 *)pbChainingValue; + __n128 rk0 = *(__n128 *) &pExpandedKey->RoundKey[0]; + __n128 rkLast = *(__n128 *) pExpandedKey->lastEncRoundKey; + __n128 d, rk0AndLast; + + // This algorithm is dominated by chain of dependent AES rounds, so we want to avoid EOR + // instructions on the critical path where possible + // We can compute (last round key ^ this plaintext block ^ first round key) off the critical + // path and use this with AES_ENCRYPT_1_CHAIN so that only AES instructions write to c in + // the main loop + rk0AndLast = veorq_u8( rk0, rkLast ); + + c = veorq_u8( c, rkLast ); + + while( cbData >= SYMCRYPT_AES_BLOCK_SIZE ) + { + d = veorq_u8( *(__n128 *)pbData, rk0AndLast); + AES_ENCRYPT_1_CHAIN( pExpandedKey, c, d ); + + pbData += SYMCRYPT_AES_BLOCK_SIZE; + cbData -= SYMCRYPT_AES_BLOCK_SIZE; + } + *(__n128 *)pbChainingValue = veorq_u8( c, rkLast ); +} + +// Disable warnings and VC++ runtime checks for use of uninitialized values (by design) +#pragma warning(push) +#pragma warning( disable: 6001 4701 ) +#pragma runtime_checks( "u", off ) +VOID +SYMCRYPT_CALL +SymCryptAesEcbEncryptNeon( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + __n128 c0, c1, c2, c3, c4, c5, c6, c7; + const __n128 * pSrc = (const __n128 *) pbSrc; + __n128 * pDst = (__n128 *) pbDst; + + while( cbData >= 8 * SYMCRYPT_AES_BLOCK_SIZE ) + { + c0 = pSrc[0]; + c1 = pSrc[1]; + c2 = pSrc[2]; + c3 = pSrc[3]; + c4 = pSrc[4]; + c5 = pSrc[5]; + c6 = pSrc[6]; + c7 = pSrc[7]; + + AES_ENCRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 ); + + pDst[0] = c0; + pDst[1] = c1; + pDst[2] = c2; + pDst[3] = c3; + pDst[4] = c4; + pDst[5] = c5; + pDst[6] = c6; + pDst[7] = c7; + + pSrc += 8; + pDst += 8; + cbData -= 8 * SYMCRYPT_AES_BLOCK_SIZE; + } + + if( cbData < 16 ) + { + return; + } + + c0 = pSrc[0]; + if( cbData >= 32 ) + { + c1 = pSrc[1]; + if( cbData >= 48 ) + { + c2 = pSrc[2]; + if( cbData >= 64 ) + { + c3 = pSrc[3]; + if( cbData >= 80 ) + { + c4 = pSrc[4]; + if( cbData >= 96 ) + { + c5 = pSrc[5]; + if( cbData >= 112 ) + { + c6 = pSrc[6]; + } + } + } + } + } + } + + if( cbData >= 5 * SYMCRYPT_AES_BLOCK_SIZE ) + { + AES_ENCRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 ); + } + else if( cbData >= 2 * SYMCRYPT_AES_BLOCK_SIZE ) + { + AES_ENCRYPT_4( pExpandedKey, c0, c1, c2, c3 ); + } + else + { + AES_ENCRYPT_1( pExpandedKey, c0 ); + } + + pDst[0] = c0; + if( cbData >= 32 ) + { + pDst[1] = c1; + if( cbData >= 48 ) + { + pDst[2] = c2; + if( cbData >= 64 ) + { + pDst[3] = c3; + if( cbData >= 80 ) + { + pDst[4] = c4; + if( cbData >= 96 ) + { + pDst[5] = c5; + if( cbData >= 112 ) + { + pDst[6] = c6; + } + } + } + } + } + } +} +#pragma runtime_checks( "u", restore) +#pragma warning( pop ) + +#pragma warning(push) +#pragma warning( disable:4701 ) // "Use of uninitialized variable" +#pragma runtime_checks( "u", off ) + +#define SYMCRYPT_AesCtrMsbXxNeon SymCryptAesCtrMsb64Neon +#define VADDQ_UXX vaddq_u64 +#define VSUBQ_UXX vsubq_u64 + +#include "aes-pattern.c" + +#undef VSUBQ_UXX +#undef VADDQ_UXX +#undef SYMCRYPT_AesCtrMsbXxNeon + +#define SYMCRYPT_AesCtrMsbXxNeon SymCryptAesCtrMsb32Neon +#define VADDQ_UXX vaddq_u32 +#define VSUBQ_UXX vsubq_u32 + +#include "aes-pattern.c" + +#undef VSUBQ_UXX +#undef VADDQ_UXX +#undef SYMCRYPT_AesCtrMsbXxNeon + +#pragma runtime_checks( "u", restore ) +#pragma warning(pop) + + +// +// Multiply by alpha +// +// <</>> indicate shifts on 128-bit values +// <<<</>>>> indicate shifts on 32-bit values +// + +// Multiply by ALPHA +// t1 = Input <<<< 1 words shifted left by 1 +// t2 = Input >>>> 31 words shifted right by 31 +// t1 = t1 ^ (t2 << 32) t1 = S << 1 +// t2 = t2 >> 96 t2 = highest bit of S +// t2 = (t2 <<<< 7) + (t2 <<<<3) - (t2) multiply polynomially by 0x87 , we can use - because we only have one bit input +// res = t1 ^ t2 +// +#define XTS_MUL_ALPHA_old( _in, _res ) \ +{\ + __n128 _t1, _t2;\ +\ + _t1 = vshlq_n_u32( _in, 1 ); \ + _t2 = vshrq_n_u32( _in, 31); \ + _t1 = veorq_u32( _t1, vextq_u32( vZero, _t2, 3 )); \ + _t2 = vextq_u32( _t2, vZero, 3); \ + _t2 = vsubq_u32( vaddq_u32( vshlq_n_u32( _t2, 7 ), vshlq_n_u32( _t2, 3 ) ), _t2 ); \ + _res = veorq_u32( _t1, _t2 ); \ +} + +// +// Another approach, use signed shift right to duplicate the bits of the leftmost byte +// and an AND to mask the modulo reduction and the extraneous bits in the other bytes at the same time. +// vAlphaMask = (1, 1, ..., 1, 0x87 ) +// +#define XTS_MUL_ALPHA( _in, _res ) \ +{\ + __n128 _t1, _t2;\ +\ + _t1 = vshlq_n_u8( _in, 1 ); \ + _t2 = vshrq_n_s8( _in, 7 ); \ + _t2 = vextq_u8( _t2, _t2, 15 ); \ + _t2 = vandq_u8( _t2, vAlphaMask ); \ + _res = veorq_u8( _t2, _t1 ); \ +} + + +// Multiply by ALPHA^2 +// t1 = Input <<<< 2 +// t2 = Input >>>> 30 +// t1 = t1 ^ (t2 << 32) +// t2 = t2 >> 96 +// t2 = (t2 <<<< 7) ^ (t2 <<<< 2) ^ (t2 <<<< 1) ^ t2 +// res = t1 ^ t2 +#define XTS_MUL_ALPHA2( _in, _res ) \ +{\ + __n128 _t1, _t2;\ +\ + _t1 = vshlq_n_u32( _in, 2 ); \ + _t2 = vshrq_n_u32( _in, 30); \ + _t1 = veorq_u32( _t1, vextq_u32( vZero, _t2, 3 )); \ + _t2 = vextq_u32( _t2, vZero, 3 ); \ + _t2 = veorq_u32( veorq_u32( veorq_u32( _t2, vshlq_n_u32( _t2, 7 )), vshlq_n_u32( _t2, 2 ) ), vshlq_n_u32( _t2, 1 ) ); \ + _res = veorq_u32( _t1, _t2 ); \ +} + +// Multiply by ALPHA^4 +// t1 = Input <<<< 4 +// t2 = Input >>>> 28 +// t1 = t1 ^ (t2 << 32) +// t2 = t2 >> 96 +// t2 = (t2 <<<< 7) ^ (t2 <<<< 2) ^ (t2 <<<< 1) ^ t2 +// res = t1 ^ t2 +#define XTS_MUL_ALPHA4( _in, _res ) \ +{\ + __n128 _t1, _t2;\ +\ + _t1 = vshlq_n_u32( _in, 4 ); \ + _t2 = vshrq_n_u32( _in, 28); \ + _t1 = veorq_u32( _t1, vextq_u32( vZero, _t2, 3 )); \ + _t2 = vextq_u32( _t2, vZero, 3 ); \ + _t2 = veorq_u32( veorq_u32( veorq_u32( _t2, vshlq_n_u32( _t2, 7 )), vshlq_n_u32( _t2, 2 ) ), vshlq_n_u32( _t2, 1 ) ); \ + _res = veorq_u32( _t1, _t2 ); \ +} + +#define XTS_MUL_ALPHA5( _in, _res ) \ +{\ + __n128 _t1, _t2;\ +\ + _t1 = vshlq_n_u32( _in, 5 ); \ + _t2 = vshrq_n_u32( _in, 27); \ + _t1 = veorq_u32( _t1, vextq_u32( vZero, _t2, 3 )); \ + _t2 = vextq_u32( _t2, vZero, 3 ); \ + _t2 = veorq_u32( veorq_u32( veorq_u32( _t2, vshlq_n_u32( _t2, 7 )), vshlq_n_u32( _t2, 2 ) ), vshlq_n_u32( _t2, 1 ) ); \ + _res = veorq_u32( _t1, _t2 ); \ +} + +// Multiply by ALPHA^8 +// res = (Input << 8) | (Input >> 120) +// t2 = (Input >> 120) * 0x86 +// i.e. ((Input >> 120) <<<< 7) ^ ((Input >> 120) <<<< 2) ^ ((Input >> 120) <<<< 1) +// the 0x01 component is already in res where we want it +// res = res ^ t2 +// +// vAlphaMultiplier = (0, 0, ..., 0, 0x86 ) + +#define XTS_MUL_ALPHA8( _in, _res ) \ +{\ + __n128 _t2;\ +\ + _res = vextq_u8( _in, _in, 15 ); \ + _t2 = vmull_p8( vget_low_p8(_res), vAlphaMultiplier ); \ + _res = veorq_u32( _res, _t2 ); \ +} + + +VOID +SYMCRYPT_CALL +SymCryptXtsAesEncryptDataUnitNeon( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_(SYMCRYPT_AES_BLOCK_SIZE)PBYTE pbTweakBlock, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + __n128 t0, t1, t2, t3, t4, t5, t6, t7; + __n128 c0, c1, c2, c3, c4, c5, c6, c7; + const __n128 vZero = vmovq_n_u8(0); + const __n128 vAlphaMask = SYMCRYPT_SET_N128_U8(0x87, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + const __n64 vAlphaMultiplier = SYMCRYPT_SET_N64_U64(0x0000000000000086); + + SIZE_T cbDataMain; // number of bytes to handle in the main loop + SIZE_T cbDataTail; // number of bytes to handle in the tail loop + BYTE tailBuf[2*SYMCRYPT_AES_BLOCK_SIZE]; + + SYMCRYPT_ASSERT(cbData >= SYMCRYPT_AES_BLOCK_SIZE); + + // To simplify logic and unusual size processing, we handle all + // data not a multiple of 8 blocks in the tail loop + cbDataTail = cbData & ((8*SYMCRYPT_AES_BLOCK_SIZE)-1); + // Additionally, so that ciphertext stealing logic does not rely on + // reading back from the destination buffer, when we have a non-zero + // tail, we ensure that we handle at least 1 whole block in the tail + // + // Note that our caller has ensured we have at least 1 whole block + // to process, this is checked in debug build + // This means that cbDataTail is in [1,15] at this point iff there are + // at least 8 whole blocks to process; so the below does not cause + // cbDataTail or cbDataMain to exceed cbData + cbDataTail += ((cbDataTail > 0) && (cbDataTail < SYMCRYPT_AES_BLOCK_SIZE)) ? (8*SYMCRYPT_AES_BLOCK_SIZE) : 0; + cbDataMain = cbData - cbDataTail; + + SYMCRYPT_ASSERT(cbDataMain <= cbData); + SYMCRYPT_ASSERT(cbDataTail <= cbData); + SYMCRYPT_ASSERT((cbDataMain & ((8*SYMCRYPT_AES_BLOCK_SIZE)-1)) == 0); + + t0 = *(__n128 *)pbTweakBlock; + + if( cbDataMain > 0 ) + { + // Set up for main loop entry + // NOTE: We load the first 8 blocks and store the last 8 blocks out of the loop to allow + // greater instruction interleaving in the main loop. + // This appears to give about 5-8% performance uplift on little (in-order) cores and has + // no effect on big cores. + XTS_MUL_ALPHA4( t0, t4 ); + XTS_MUL_ALPHA ( t0, t1 ); + XTS_MUL_ALPHA ( t4, t5 ); + XTS_MUL_ALPHA ( t1, t2 ); + XTS_MUL_ALPHA ( t5, t6 ); + XTS_MUL_ALPHA ( t2, t3 ); + XTS_MUL_ALPHA ( t6, t7 ); + + c0 = veorq_u32( vld1q_u8( pbSrc + (0*16) ), t0 ); + c1 = veorq_u32( vld1q_u8( pbSrc + (1*16) ), t1 ); + c2 = veorq_u32( vld1q_u8( pbSrc + (2*16) ), t2 ); + c3 = veorq_u32( vld1q_u8( pbSrc + (3*16) ), t3 ); + c4 = veorq_u32( vld1q_u8( pbSrc + (4*16) ), t4 ); + c5 = veorq_u32( vld1q_u8( pbSrc + (5*16) ), t5 ); + c6 = veorq_u32( vld1q_u8( pbSrc + (6*16) ), t6 ); + c7 = veorq_u32( vld1q_u8( pbSrc + (7*16) ), t7 ); + + for(;;) + { + pbSrc += 8 * SYMCRYPT_AES_BLOCK_SIZE; + + AES_ENCRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 ); + + cbDataMain -= 8 * SYMCRYPT_AES_BLOCK_SIZE; + if( cbDataMain < 8 * SYMCRYPT_AES_BLOCK_SIZE ) + { + break; + } + + // Interleave the final xor, write, and compute next tweak block, and load, and first xor. + // This reduces register pressure and is more efficient. + vst1q_u8( pbDst + (0*16), veorq_u32( c0, t0 ) ); + vst1q_u8( pbDst + (1*16), veorq_u32( c1, t1 ) ); + vst1q_u8( pbDst + (2*16), veorq_u32( c2, t2 ) ); + vst1q_u8( pbDst + (3*16), veorq_u32( c3, t3 ) ); + vst1q_u8( pbDst + (4*16), veorq_u32( c4, t4 ) ); + vst1q_u8( pbDst + (5*16), veorq_u32( c5, t5 ) ); + vst1q_u8( pbDst + (6*16), veorq_u32( c6, t6 ) ); + vst1q_u8( pbDst + (7*16), veorq_u32( c7, t7 ) ); + + XTS_MUL_ALPHA8( t0, t0 ); + XTS_MUL_ALPHA8( t1, t1 ); + XTS_MUL_ALPHA8( t2, t2 ); + XTS_MUL_ALPHA8( t3, t3 ); + XTS_MUL_ALPHA8( t4, t4 ); + XTS_MUL_ALPHA8( t5, t5 ); + XTS_MUL_ALPHA8( t6, t6 ); + XTS_MUL_ALPHA8( t7, t7 ); + + c0 = veorq_u32( vld1q_u8( pbSrc + (0*16) ), t0 ); + c1 = veorq_u32( vld1q_u8( pbSrc + (1*16) ), t1 ); + c2 = veorq_u32( vld1q_u8( pbSrc + (2*16) ), t2 ); + c3 = veorq_u32( vld1q_u8( pbSrc + (3*16) ), t3 ); + c4 = veorq_u32( vld1q_u8( pbSrc + (4*16) ), t4 ); + c5 = veorq_u32( vld1q_u8( pbSrc + (5*16) ), t5 ); + c6 = veorq_u32( vld1q_u8( pbSrc + (6*16) ), t6 ); + c7 = veorq_u32( vld1q_u8( pbSrc + (7*16) ), t7 ); + + pbDst += 8 * SYMCRYPT_AES_BLOCK_SIZE; + } + + vst1q_u8( pbDst + (0*16), veorq_u32( c0, t0 ) ); + vst1q_u8( pbDst + (1*16), veorq_u32( c1, t1 ) ); + vst1q_u8( pbDst + (2*16), veorq_u32( c2, t2 ) ); + vst1q_u8( pbDst + (3*16), veorq_u32( c3, t3 ) ); + vst1q_u8( pbDst + (4*16), veorq_u32( c4, t4 ) ); + vst1q_u8( pbDst + (5*16), veorq_u32( c5, t5 ) ); + vst1q_u8( pbDst + (6*16), veorq_u32( c6, t6 ) ); + vst1q_u8( pbDst + (7*16), veorq_u32( c7, t7 ) ); + + // We won't do another 8-block set + // Update only the first tweak block in case it is needed for tail + XTS_MUL_ALPHA8( t0, t0 ); + + pbDst += 8 * SYMCRYPT_AES_BLOCK_SIZE; + } + + if( cbDataTail == 0 ) + { + return; // <-- expected case; early return here + } + + // Rare case, with data unit length not being multiple of 128 bytes, handle the tail one block at a time + while( cbDataTail >= 2*SYMCRYPT_AES_BLOCK_SIZE ) + { + c0 = veorq_u32( vld1q_u8(pbSrc), t0 ); + pbSrc += SYMCRYPT_AES_BLOCK_SIZE; + AES_ENCRYPT_1( pExpandedKey, c0 ); + vst1q_u8( pbDst, veorq_u32( c0, t0 ) ); + pbDst += SYMCRYPT_AES_BLOCK_SIZE; + XTS_MUL_ALPHA( t0, t0 ); + cbDataTail -= SYMCRYPT_AES_BLOCK_SIZE; + } + + if( cbDataTail > SYMCRYPT_AES_BLOCK_SIZE ) + { + // Ciphertext stealing encryption + // + // +--------------+ + // | | + // | V + // +-----------------+ | +-----+-----------+ + // | P_m-1 | | | P_m |++++CP+++++| + // +-----------------+ | +-----+-----------+ + // | | | + // enc_m-1 | enc_m + // | | | + // V | V + // +-----+-----------+ | +-----------------+ + // | C_m |++++CP+++++|--+ | C_m-1 | + // +-----+-----------+ +-----------------+ + // | / + // +---------------- / --+ + // / | + // | V + // +-----------------+ | +-----+ + // | C_m-1 |<-+ | C_m | + // +-----------------+ +-----+ + + // Encrypt penultimate plaintext block into tailBuf + c0 = veorq_u32( vld1q_u8(pbSrc), t0 ); + AES_ENCRYPT_1( pExpandedKey, c0 ); + c0 = veorq_u32( c0, t0 ); + vst1q_u8( &tailBuf[0], c0 ); + vst1q_u8( &tailBuf[SYMCRYPT_AES_BLOCK_SIZE], c0 ); + + cbDataTail -= SYMCRYPT_AES_BLOCK_SIZE; + + // Copy final plaintext bytes to prefix of tailBuf - we must read before writing to support in-place encryption + memcpy( &tailBuf[0], pbSrc + SYMCRYPT_AES_BLOCK_SIZE, cbDataTail ); + // Copy prefix of tailBuf[SYMCRYPT_AES_BLOCK_SIZE] to the right place in the destination buffer + memcpy( pbDst + SYMCRYPT_AES_BLOCK_SIZE, &tailBuf[SYMCRYPT_AES_BLOCK_SIZE], cbDataTail ); + + // Do final tweak update + XTS_MUL_ALPHA( t0, t0 ); + + // Load updated tailBuf into c0 + c0 = vld1q_u8( &tailBuf[0] ); + } else { + // Just load final plaintext block into c0 + c0 = vld1q_u8( pbSrc ); + } + + // Final full block encryption + c0 = veorq_u32( c0, t0 ); + AES_ENCRYPT_1( pExpandedKey, c0 ); + vst1q_u8( pbDst, veorq_u32( c0, t0 ) ); +} + + +VOID +SYMCRYPT_CALL +SymCryptXtsAesDecryptDataUnitNeon( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_(SYMCRYPT_AES_BLOCK_SIZE)PBYTE pbTweakBlock, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + __n128 t0, t1, t2, t3, t4, t5, t6, t7; + __n128 c0, c1, c2, c3, c4, c5, c6, c7; + const __n128 vZero = vmovq_n_u8(0); + const __n128 vAlphaMask = SYMCRYPT_SET_N128_U8(0x87, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + const __n64 vAlphaMultiplier = SYMCRYPT_SET_N64_U64(0x0000000000000086); + + SIZE_T cbDataMain; // number of bytes to handle in the main loop + SIZE_T cbDataTail; // number of bytes to handle in the tail loop + BYTE tailBuf[2*SYMCRYPT_AES_BLOCK_SIZE]; + + SYMCRYPT_ASSERT(cbData >= SYMCRYPT_AES_BLOCK_SIZE); + + // To simplify logic and unusual size processing, we handle all + // data not a multiple of 8 blocks in the tail loop + cbDataTail = cbData & ((8*SYMCRYPT_AES_BLOCK_SIZE)-1); + // Additionally, so that ciphertext stealing logic does not rely on + // reading back from the destination buffer, when we have a non-zero + // tail, we ensure that we handle at least 1 whole block in the tail + // + // Note that our caller has ensured we have at least 1 whole block + // to process, this is checked in debug build + // This means that cbDataTail is in [1,15] at this point iff there are + // at least 8 whole blocks to process; so the below does not cause + // cbDataTail or cbDataMain to exceed cbData + cbDataTail += ((cbDataTail > 0) && (cbDataTail < SYMCRYPT_AES_BLOCK_SIZE)) ? (8*SYMCRYPT_AES_BLOCK_SIZE) : 0; + cbDataMain = cbData - cbDataTail; + + SYMCRYPT_ASSERT(cbDataMain <= cbData); + SYMCRYPT_ASSERT(cbDataTail <= cbData); + SYMCRYPT_ASSERT((cbDataMain & ((8*SYMCRYPT_AES_BLOCK_SIZE)-1)) == 0); + + t0 = *(__n128 *)pbTweakBlock; + t7 = t0; + + if( cbDataMain > 0 ) + { + // Set up for main loop entry + // NOTE: We load the first 8 blocks and store the last 8 blocks out of the loop to allow + // greater instruction interleaving in the main loop. + // This appears to give about 5-8% performance uplift on little (in-order) cores and has + // no effect on big cores. + XTS_MUL_ALPHA4( t0, t4 ); + XTS_MUL_ALPHA ( t0, t1 ); + XTS_MUL_ALPHA ( t4, t5 ); + XTS_MUL_ALPHA ( t1, t2 ); + XTS_MUL_ALPHA ( t5, t6 ); + XTS_MUL_ALPHA ( t2, t3 ); + XTS_MUL_ALPHA ( t6, t7 ); + + c0 = veorq_u32( vld1q_u8( pbSrc + (0*16) ), t0 ); + c1 = veorq_u32( vld1q_u8( pbSrc + (1*16) ), t1 ); + c2 = veorq_u32( vld1q_u8( pbSrc + (2*16) ), t2 ); + c3 = veorq_u32( vld1q_u8( pbSrc + (3*16) ), t3 ); + c4 = veorq_u32( vld1q_u8( pbSrc + (4*16) ), t4 ); + c5 = veorq_u32( vld1q_u8( pbSrc + (5*16) ), t5 ); + c6 = veorq_u32( vld1q_u8( pbSrc + (6*16) ), t6 ); + c7 = veorq_u32( vld1q_u8( pbSrc + (7*16) ), t7 ); + + for(;;) + { + pbSrc += 8 * SYMCRYPT_AES_BLOCK_SIZE; + + AES_DECRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 ); + + cbDataMain -= 8 * SYMCRYPT_AES_BLOCK_SIZE; + if( cbDataMain < 8 * SYMCRYPT_AES_BLOCK_SIZE ) + { + break; + } + + // Interleave the final xor, write, and compute next tweak block, and load, and first xor. + // This reduces register pressure and is more efficient. + vst1q_u8( pbDst + (0*16), veorq_u32( c0, t0 ) ); + vst1q_u8( pbDst + (1*16), veorq_u32( c1, t1 ) ); + vst1q_u8( pbDst + (2*16), veorq_u32( c2, t2 ) ); + vst1q_u8( pbDst + (3*16), veorq_u32( c3, t3 ) ); + vst1q_u8( pbDst + (4*16), veorq_u32( c4, t4 ) ); + vst1q_u8( pbDst + (5*16), veorq_u32( c5, t5 ) ); + vst1q_u8( pbDst + (6*16), veorq_u32( c6, t6 ) ); + vst1q_u8( pbDst + (7*16), veorq_u32( c7, t7 ) ); + + XTS_MUL_ALPHA8( t0, t0 ); + XTS_MUL_ALPHA8( t1, t1 ); + XTS_MUL_ALPHA8( t2, t2 ); + XTS_MUL_ALPHA8( t3, t3 ); + XTS_MUL_ALPHA8( t4, t4 ); + XTS_MUL_ALPHA8( t5, t5 ); + XTS_MUL_ALPHA8( t6, t6 ); + XTS_MUL_ALPHA8( t7, t7 ); + + c0 = veorq_u32( vld1q_u8( pbSrc + (0*16) ), t0 ); + c1 = veorq_u32( vld1q_u8( pbSrc + (1*16) ), t1 ); + c2 = veorq_u32( vld1q_u8( pbSrc + (2*16) ), t2 ); + c3 = veorq_u32( vld1q_u8( pbSrc + (3*16) ), t3 ); + c4 = veorq_u32( vld1q_u8( pbSrc + (4*16) ), t4 ); + c5 = veorq_u32( vld1q_u8( pbSrc + (5*16) ), t5 ); + c6 = veorq_u32( vld1q_u8( pbSrc + (6*16) ), t6 ); + c7 = veorq_u32( vld1q_u8( pbSrc + (7*16) ), t7 ); + + pbDst += 8 * SYMCRYPT_AES_BLOCK_SIZE; + } + + vst1q_u8( pbDst + (0*16), veorq_u32( c0, t0 ) ); + vst1q_u8( pbDst + (1*16), veorq_u32( c1, t1 ) ); + vst1q_u8( pbDst + (2*16), veorq_u32( c2, t2 ) ); + vst1q_u8( pbDst + (3*16), veorq_u32( c3, t3 ) ); + vst1q_u8( pbDst + (4*16), veorq_u32( c4, t4 ) ); + vst1q_u8( pbDst + (5*16), veorq_u32( c5, t5 ) ); + vst1q_u8( pbDst + (6*16), veorq_u32( c6, t6 ) ); + vst1q_u8( pbDst + (7*16), veorq_u32( c7, t7 ) ); + + // We won't do another 8-block set + // Update only the first tweak block in case it is needed for tail + XTS_MUL_ALPHA8( t0, t0 ); + + pbDst += 8 * SYMCRYPT_AES_BLOCK_SIZE; + } + + if( cbDataTail == 0 ) + { + return; // <-- expected case; early return here + } + + // Rare case, with data unit length not being multiple of 128 bytes, handle the tail one block at a time + while( cbDataTail >= 2*SYMCRYPT_AES_BLOCK_SIZE ) + { + c0 = veorq_u32( vld1q_u8( pbSrc ), t0 ); + pbSrc += SYMCRYPT_AES_BLOCK_SIZE; + AES_DECRYPT_1( pExpandedKey, c0 ); + vst1q_u8( pbDst, veorq_u32( c0, t0 ) ); + pbDst += SYMCRYPT_AES_BLOCK_SIZE; + XTS_MUL_ALPHA( t0, t0 ); + cbDataTail -= SYMCRYPT_AES_BLOCK_SIZE; + } + + if( cbDataTail > SYMCRYPT_AES_BLOCK_SIZE ) + { + // Ciphertext stealing decryption + // + // +--------------+ + // | | + // | V + // +-----------------+ | +-----+-----------+ + // | C_m-1 | | | C_m |++++CP+++++| + // +-----------------+ | +-----+-----------+ + // | | | + // dec_m | dec_m-1 + // | | | + // V | V + // +-----+-----------+ | +-----------------+ + // | P_m |++++CP+++++|--+ | P_m-1 | + // +-----+-----------+ +-----------------+ + // | / + // +---------------- / --+ + // / | + // | V + // +-----------------+ | +-----+ + // | P_m-1 |<-+ | P_m | + // +-----------------+ +-----+ + + // Do final tweak update into t1 + // Penultimate tweak is in t0, ready for final decryption + XTS_MUL_ALPHA( t0, t1 ); + + // Decrypt penultimate ciphertext block into tailBuf + c0 = veorq_u32( vld1q_u8( pbSrc ), t1 ); + AES_DECRYPT_1( pExpandedKey, c0 ); + c0 = veorq_u32( c0, t1 ); + vst1q_u8( &tailBuf[0], c0 ); + vst1q_u8( &tailBuf[SYMCRYPT_AES_BLOCK_SIZE], c0 ); + + cbDataTail -= SYMCRYPT_AES_BLOCK_SIZE; + + // Copy final ciphertext bytes to prefix of tailBuf - we must read before writing to support in-place decryption + memcpy( &tailBuf[0], pbSrc + SYMCRYPT_AES_BLOCK_SIZE, cbDataTail ); + // Copy prefix of tailBuf[SYMCRYPT_AES_BLOCK_SIZE] to the right place in the destination buffer + memcpy( pbDst + SYMCRYPT_AES_BLOCK_SIZE, &tailBuf[SYMCRYPT_AES_BLOCK_SIZE], cbDataTail ); + + // Load updated tailBuf into c0 + c0 = vld1q_u8( &tailBuf[0] ); + } else { + // Just load final ciphertext block into c0 + c0 = vld1q_u8( pbSrc ); + } + + // Final full block decryption + c0 = veorq_u32( c0, t0 ); + AES_DECRYPT_1( pExpandedKey, c0 ); + vst1q_u8( pbDst, veorq_u32( c0, t0 ) ); +} + +#include "ghash_definitions.h" + +#define AES_ENCRYPT_ROUND_4_GHASH_1( c0, c1, c2, c3, r0, r0x, t0, t1, gHashPointer, gHashExpandedKeyTable, todo, resl, resm, resh ) \ +{ \ + AESE_AESMC( c0, roundKey ) \ + AESE_AESMC( c1, roundKey ) \ + AESE_AESMC( c2, roundKey ) \ + AESE_AESMC( c3, roundKey ) \ +\ + r0x = *gHashPointer; \ + r0x = vrev64q_u8( r0x ); \ + r0 = vextq_u8( r0x, r0x, 8 ); \ + r0x = veorq_u8( r0, r0x ); \ + gHashPointer++; \ +\ + t1 = GHASH_H_POWER(gHashExpandedKeyTable, todo); \ + t0 = vmullq_p64( r0, t1 ); \ + t1 = vmull_high_p64( r0, t1 ); \ +\ + resl = veorq_u8( resl, t0 ); \ + resh = veorq_u8( resh, t1 ); \ +\ + t1 = GHASH_Hx_POWER(gHashExpandedKeyTable, todo); \ + t1 = vmullq_p64( r0x, t1 ); \ +\ + resm = veorq_u8( resm, t1 ); \ + todo--; \ +}; + +// +// Using a loop with AESE_AESMC and AESD_AESIMC, the compiler can still prematurely rearrange the loop and +// lose opportunity for scheduling adjacent pairs. +// Instead, explicitly unroll the AES rounds with this macro. +// +#define AES_GCM_ENCRYPT_4( pExpandedKey, c0, c1, c2, c3, gHashPointer, gHashRounds, gHashExpandedKeyTable, todo, resl, resm, resh ) \ +{ \ + const __n128 *keyPtr; \ + const __n128 *keyLimit; \ + __n128 roundKey; \ +\ + keyPtr = (const __n128 *)&pExpandedKey->RoundKey[0]; \ + keyLimit = (const __n128 *)pExpandedKey->lastEncRoundKey; \ + __n128 t0, t1, r0, r0x; \ + SIZE_T aesEncryptGhashLoop; \ +\ + /* Do gHashRounds full rounds (AES-128|AES-192|AES-256) with stitched GHASH */ \ + roundKey = *keyPtr++; \ + for( aesEncryptGhashLoop = 0; aesEncryptGhashLoop < gHashRounds; aesEncryptGhashLoop++) \ + { \ + AES_ENCRYPT_ROUND_4_GHASH_1( c0, c1, c2, c3, r0, r0x, t0, t1, gHashPointer, gHashExpandedKeyTable, todo, resl, resm, resh ) \ + roundKey = *keyPtr++; \ + } \ +\ + /* Do 9-gHashRounds full rounds (AES-128|AES-192|AES-256) */ \ + for( aesEncryptGhashLoop = 0; aesEncryptGhashLoop < (9-gHashRounds); aesEncryptGhashLoop++) \ + { \ + AES_ENCRYPT_ROUND_4( c0, c1, c2, c3, c4, c5, c6, c7 ) \ + roundKey = *keyPtr++; \ + } \ +\ + if ( keyPtr < keyLimit ) \ + { \ + /* Do 2 more full rounds (AES-192|AES-256) */ \ + AES_ENCRYPT_ROUND_4( c0, c1, c2, c3, c4, c5, c6, c7 ) \ + roundKey = *keyPtr++; \ + AES_ENCRYPT_ROUND_4( c0, c1, c2, c3, c4, c5, c6, c7 ) \ + roundKey = *keyPtr++; \ +\ + if ( keyPtr < keyLimit ) \ + { \ + /* Do 2 more full rounds (AES-256) */ \ + AES_ENCRYPT_ROUND_4( c0, c1, c2, c3, c4, c5, c6, c7 ) \ + roundKey = *keyPtr++; \ + AES_ENCRYPT_ROUND_4( c0, c1, c2, c3, c4, c5, c6, c7 ) \ + roundKey = *keyPtr++; \ + } \ + } \ +\ + /* Do final round (AES-128|AES-192|AES-256) */ \ + AES_ENCRYPT_FINAL_4( c0, c1, c2, c3, c4, c5, c6, c7 ) \ +}; + +#define AES_ENCRYPT_ROUND_8_GHASH_1( c0, c1, c2, c3, c4, c5, c6, c7, r0, r0x, t0, t1, gHashPointer, gHashExpandedKeyTable, todo, resl, resm, resh ) \ +{ \ + AESE_AESMC( c0, roundKey ) \ + AESE_AESMC( c1, roundKey ) \ + AESE_AESMC( c2, roundKey ) \ + AESE_AESMC( c3, roundKey ) \ + AESE_AESMC( c4, roundKey ) \ + AESE_AESMC( c5, roundKey ) \ + AESE_AESMC( c6, roundKey ) \ + AESE_AESMC( c7, roundKey ) \ +\ + r0x = *gHashPointer; \ + r0x = vrev64q_u8( r0x ); \ + r0 = vextq_u8( r0x, r0x, 8 ); \ + r0x = veorq_u8( r0, r0x ); \ + gHashPointer++; \ +\ + t1 = GHASH_H_POWER(gHashExpandedKeyTable, todo); \ + t0 = vmullq_p64( r0, t1 ); \ + t1 = vmull_high_p64( r0, t1 ); \ +\ + resl = veorq_u8( resl, t0 ); \ + resh = veorq_u8( resh, t1 ); \ +\ + t1 = GHASH_Hx_POWER(gHashExpandedKeyTable, todo); \ + t1 = vmullq_p64( r0x, t1 ); \ +\ + resm = veorq_u8( resm, t1 ); \ + todo--; \ +}; + +// +// Using a loop with AESE_AESMC and AESD_AESIMC, the compiler can still prematurely rearrange the loop and +// lose opportunity for scheduling adjacent pairs. +// Instead, explicitly unroll the AES rounds with this macro. +// +#define AES_GCM_ENCRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7, gHashPointer, gHashRounds, gHashExpandedKeyTable, todo, resl, resm, resh ) \ +{ \ + const __n128 *keyPtr; \ + const __n128 *keyLimit; \ + __n128 roundKey; \ +\ + keyPtr = (const __n128 *)&pExpandedKey->RoundKey[0]; \ + keyLimit = (const __n128 *)pExpandedKey->lastEncRoundKey; \ + __n128 t0, t1, r0, r0x; \ + SIZE_T aesEncryptGhashLoop; \ +\ + /* Do gHashRounds full rounds (AES-128|AES-192|AES-256) with stitched GHASH */ \ + roundKey = *keyPtr++; \ + for( aesEncryptGhashLoop = 0; aesEncryptGhashLoop < gHashRounds; aesEncryptGhashLoop++) \ + { \ + AES_ENCRYPT_ROUND_8_GHASH_1( c0, c1, c2, c3, c4, c5, c6, c7, r0, r0x, t0, t1, gHashPointer, gHashExpandedKeyTable, todo, resl, resm, resh ) \ + roundKey = *keyPtr++; \ + } \ +\ + /* Do 9-gHashRounds full rounds (AES-128|AES-192|AES-256) */ \ + for( aesEncryptGhashLoop = 0; aesEncryptGhashLoop < (9-gHashRounds); aesEncryptGhashLoop++) \ + { \ + AES_ENCRYPT_ROUND_8( c0, c1, c2, c3, c4, c5, c6, c7 ) \ + roundKey = *keyPtr++; \ + } \ +\ + if ( keyPtr < keyLimit ) \ + { \ + /* Do 2 more full rounds (AES-192|AES-256) */ \ + AES_ENCRYPT_ROUND_8( c0, c1, c2, c3, c4, c5, c6, c7 ) \ + roundKey = *keyPtr++; \ + AES_ENCRYPT_ROUND_8( c0, c1, c2, c3, c4, c5, c6, c7 ) \ + roundKey = *keyPtr++; \ +\ + if ( keyPtr < keyLimit ) \ + { \ + /* Do 2 more full rounds (AES-256) */ \ + AES_ENCRYPT_ROUND_8( c0, c1, c2, c3, c4, c5, c6, c7 ) \ + roundKey = *keyPtr++; \ + AES_ENCRYPT_ROUND_8( c0, c1, c2, c3, c4, c5, c6, c7 ) \ + roundKey = *keyPtr++; \ + } \ + } \ +\ + /* Do final round (AES-128|AES-192|AES-256) */ \ + AES_ENCRYPT_FINAL_8( c0, c1, c2, c3, c4, c5, c6, c7 ) \ +}; + +// This call is functionally identical to: +// SymCryptAesCtrMsb64Neon( pExpandedKey, +// pbChainingValue, +// pbSrc, +// pbDst, +// cbData ); +// SymCryptGHashAppendDataPmull( expandedKeyTable, +// pState, +// pbDstOrig, +// cbDataOrig ); +VOID +SYMCRYPT_CALL +SymCryptAesGcmEncryptStitchedNeon( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( SYMCRYPT_GF128_FIELD_SIZE ) PCSYMCRYPT_GF128_ELEMENT expandedKeyTable, + _Inout_ PSYMCRYPT_GF128_ELEMENT pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + __n128 chain = *(__n128 *)pbChainingValue; + const __n128 * pSrc = (const __n128 *) pbSrc; + const __n128 * pGhashSrc = (const __n128 *) pbDst; + __n128 * pDst = (__n128 *) pbDst; + + const __n128 chainIncrement1 = SYMCRYPT_SET_N128_U64( 0, 1 ); + const __n128 chainIncrement2 = SYMCRYPT_SET_N128_U64( 0, 2 ); + const __n128 chainIncrement8 = SYMCRYPT_SET_N128_U64( 0, 8 ); + + __n128 ctr0, ctr1, ctr2, ctr3, ctr4, ctr5, ctr6, ctr7; + __n128 c0, c1, c2, c3, c4, c5, c6, c7; + __n128 r0, r1; + __n128 r0x, r1x; + + __n128 state; + __n128 a0, a1, a2; + const __n64 vMultiplicationConstant = SYMCRYPT_SET_N64_U64(0xc200000000000000); + SIZE_T nBlocks = cbData / SYMCRYPT_GF128_BLOCK_SIZE; + SIZE_T todo; + + SYMCRYPT_ASSERT( (cbData & SYMCRYPT_GCM_BLOCK_MOD_MASK) == 0 ); // cbData is multiple of block size + + // Our chain variable is in integer format, not the MSBfirst format loaded from memory. + ctr0 = vrev64q_u8( chain ); + ctr1 = vaddq_u32( ctr0, chainIncrement1 ); + ctr2 = vaddq_u32( ctr0, chainIncrement2 ); + ctr3 = vaddq_u32( ctr1, chainIncrement2 ); + ctr4 = vaddq_u32( ctr2, chainIncrement2 ); + ctr5 = vaddq_u32( ctr3, chainIncrement2 ); + ctr6 = vaddq_u32( ctr4, chainIncrement2 ); + ctr7 = vaddq_u32( ctr5, chainIncrement2 ); + + state = *(__n128 *) pState; + + todo = SYMCRYPT_MIN( nBlocks, SYMCRYPT_GHASH_PMULL_HPOWERS ); + CLMUL_3( state, GHASH_H_POWER(expandedKeyTable, todo), GHASH_Hx_POWER(expandedKeyTable, todo), a0, a1, a2 ); + + // Do 8 blocks of CTR either for tail (if total blocks <8) or for encryption of first 8 blocks + c0 = vrev64q_u8( ctr0 ); + c1 = vrev64q_u8( ctr1 ); + c2 = vrev64q_u8( ctr2 ); + c3 = vrev64q_u8( ctr3 ); + c4 = vrev64q_u8( ctr4 ); + c5 = vrev64q_u8( ctr5 ); + c6 = vrev64q_u8( ctr6 ); + c7 = vrev64q_u8( ctr7 ); + + AES_ENCRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 ); + + if ( cbData >= 8 * SYMCRYPT_AES_BLOCK_SIZE ) + { + ctr0 = vaddq_u32( ctr0, chainIncrement8 ); + ctr1 = vaddq_u32( ctr1, chainIncrement8 ); + ctr2 = vaddq_u32( ctr2, chainIncrement8 ); + ctr3 = vaddq_u32( ctr3, chainIncrement8 ); + ctr4 = vaddq_u32( ctr4, chainIncrement8 ); + ctr5 = vaddq_u32( ctr5, chainIncrement8 ); + ctr6 = vaddq_u32( ctr6, chainIncrement8 ); + ctr7 = vaddq_u32( ctr7, chainIncrement8 ); + + // Encrypt first 8 blocks + pDst[0] = veorq_u64( pSrc[0], c0 ); + pDst[1] = veorq_u64( pSrc[1], c1 ); + pDst[2] = veorq_u64( pSrc[2], c2 ); + pDst[3] = veorq_u64( pSrc[3], c3 ); + pDst[4] = veorq_u64( pSrc[4], c4 ); + pDst[5] = veorq_u64( pSrc[5], c5 ); + pDst[6] = veorq_u64( pSrc[6], c6 ); + pDst[7] = veorq_u64( pSrc[7], c7 ); + + pDst += 8; + pSrc += 8; + + while( nBlocks >= 16 ) + { + // In this loop we always have 8 blocks to encrypt and we have already encrypted the previous 8 blocks ready for GHASH + c0 = vrev64q_u8( ctr0 ); + c1 = vrev64q_u8( ctr1 ); + c2 = vrev64q_u8( ctr2 ); + c3 = vrev64q_u8( ctr3 ); + c4 = vrev64q_u8( ctr4 ); + c5 = vrev64q_u8( ctr5 ); + c6 = vrev64q_u8( ctr6 ); + c7 = vrev64q_u8( ctr7 ); + + ctr0 = vaddq_u32( ctr0, chainIncrement8 ); + ctr1 = vaddq_u32( ctr1, chainIncrement8 ); + ctr2 = vaddq_u32( ctr2, chainIncrement8 ); + ctr3 = vaddq_u32( ctr3, chainIncrement8 ); + ctr4 = vaddq_u32( ctr4, chainIncrement8 ); + ctr5 = vaddq_u32( ctr5, chainIncrement8 ); + ctr6 = vaddq_u32( ctr6, chainIncrement8 ); + ctr7 = vaddq_u32( ctr7, chainIncrement8 ); + + AES_GCM_ENCRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7, pGhashSrc, 8, expandedKeyTable, todo, a0, a1, a2 ); + + pDst[0] = veorq_u64( pSrc[0], c0 ); + pDst[1] = veorq_u64( pSrc[1], c1 ); + pDst[2] = veorq_u64( pSrc[2], c2 ); + pDst[3] = veorq_u64( pSrc[3], c3 ); + pDst[4] = veorq_u64( pSrc[4], c4 ); + pDst[5] = veorq_u64( pSrc[5], c5 ); + pDst[6] = veorq_u64( pSrc[6], c6 ); + pDst[7] = veorq_u64( pSrc[7], c7 ); + + pDst += 8; + pSrc += 8; + nBlocks -= 8; + + if (todo == 0) + { + CLMUL_3_POST( a0, a1, a2 ); + MODREDUCE( vMultiplicationConstant, a0, a1, a2, state ); + + todo = SYMCRYPT_MIN( nBlocks, SYMCRYPT_GHASH_PMULL_HPOWERS ); + CLMUL_3( state, GHASH_H_POWER(expandedKeyTable, todo), GHASH_Hx_POWER(expandedKeyTable, todo), a0, a1, a2 ); + } + } + + // We now have at least 8 blocks of encrypted data to GHASH and at most 7 blocks left to encrypt + // Do 8 blocks of GHASH in parallel with generating 0, 4, or 8 AES-CTR blocks for tail encryption + nBlocks -= 8; + if (nBlocks > 0) + { + c0 = vrev64q_u8( ctr0 ); + c1 = vrev64q_u8( ctr1 ); + c2 = vrev64q_u8( ctr2 ); + c3 = vrev64q_u8( ctr3 ); + + if (nBlocks > 4) + { + // Do 8 rounds of AES-CTR for tail in parallel with 8 rounds of GHASH + c4 = vrev64q_u8( ctr4 ); + c5 = vrev64q_u8( ctr5 ); + c6 = vrev64q_u8( ctr6 ); + + AES_GCM_ENCRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7, pGhashSrc, 8, expandedKeyTable, todo, a0, a1, a2 ); + } + else + { + // Do 4 rounds of AES-CTR for tail in parallel with 8 rounds of GHASH + AES_GCM_ENCRYPT_4( pExpandedKey, c0, c1, c2, c3, pGhashSrc, 8, expandedKeyTable, todo, a0, a1, a2 ); + } + + if( todo == 0) + { + CLMUL_3_POST( a0, a1, a2 ); + MODREDUCE( vMultiplicationConstant, a0, a1, a2, state ); + + todo = SYMCRYPT_MIN( nBlocks, SYMCRYPT_GHASH_PMULL_HPOWERS ); + CLMUL_3( state, GHASH_H_POWER(expandedKeyTable, todo), GHASH_Hx_POWER(expandedKeyTable, todo), a0, a1, a2 ); + } + } + else + { + // Just do the final 8 rounds of GHASH + for( todo=8; todo>0; todo-- ) + { + r0x = vrev64q_u8( pGhashSrc[0] ); + r0 = vextq_u8( r0x, r0x, 8 ); + r0x = veorq_u8( r0, r0x ); + pGhashSrc++; + + CLMUL_ACCX_3( r0, r0x, GHASH_H_POWER(expandedKeyTable, todo), GHASH_Hx_POWER(expandedKeyTable, todo), a0, a1, a2 ); + } + + CLMUL_3_POST( a0, a1, a2 ); + MODREDUCE( vMultiplicationConstant, a0, a1, a2, state ); + } + } + + if( nBlocks > 0 ) + { + // Encrypt 1-7 blocks with pre-generated AES-CTR blocks and GHASH the results + while( nBlocks >= 2 ) + { + ctr0 = vaddq_u32( ctr0, chainIncrement2 ); + + r0 = veorq_u64( pSrc[0], c0 ); + r1 = veorq_u64( pSrc[1], c1 ); + + pDst[0] = r0; + pDst[1] = r1; + + r0x = vrev64q_u8( r0 ); + r1x = vrev64q_u8( r1 ); + r0 = vextq_u8( r0x, r0x, 8 ); + r1 = vextq_u8( r1x, r1x, 8 ); + r0x = veorq_u8( r0, r0x ); + r1x = veorq_u8( r1, r1x ); + + CLMUL_ACCX_3( r0, r0x, GHASH_H_POWER(expandedKeyTable, todo - 0), GHASH_Hx_POWER(expandedKeyTable, todo - 0), a0, a1, a2 ); + CLMUL_ACCX_3( r1, r1x, GHASH_H_POWER(expandedKeyTable, todo - 1), GHASH_Hx_POWER(expandedKeyTable, todo - 1), a0, a1, a2 ); + + pDst += 2; + pSrc += 2; + todo -= 2; + nBlocks -= 2; + c0 = c2; + c1 = c3; + c2 = c4; + c3 = c5; + c4 = c6; + } + + if( nBlocks > 0 ) + { + ctr0 = vaddq_u32( ctr0, chainIncrement1 ); + + r0 = veorq_u64( pSrc[0], c0 ); + pDst[0] = r0; + r0x = vrev64q_u8( r0 ); + r0 = vextq_u8( r0x, r0x, 8 ); + r0x = veorq_u8( r0, r0x ); + + CLMUL_ACCX_3( r0, r0x, GHASH_H_POWER(expandedKeyTable, 1), GHASH_Hx_POWER(expandedKeyTable, 1), a0, a1, a2 ); + } + + CLMUL_3_POST( a0, a1, a2 ); + MODREDUCE( vMultiplicationConstant, a0, a1, a2, state ); + } + + chain = vrev64q_u8( ctr0 ); + *(__n128 *)pbChainingValue = chain; + *(__n128 *)pState = state; +} + +#pragma warning(push) +#pragma warning( disable:4701 ) // "Use of uninitialized variable" - +#pragma runtime_checks( "u", off ) +// This call is functionally identical to: +// SymCryptGHashAppendDataPmull(expandedKeyTable, +// pState, +// pbSrc, +// cbData ); +// SymCryptAesCtrMsb64Neon( pExpandedKey, +// pbChainingValue, +// pbSrc, +// pbDst, +// cbData ); +VOID +SYMCRYPT_CALL +SymCryptAesGcmDecryptStitchedNeon( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( SYMCRYPT_GF128_FIELD_SIZE ) PCSYMCRYPT_GF128_ELEMENT expandedKeyTable, + _Inout_ PSYMCRYPT_GF128_ELEMENT pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + __n128 chain = *(__n128 *)pbChainingValue; + const __n128 * pSrc = (const __n128 *) pbSrc; + const __n128 * pGhashSrc = (const __n128 *) pbSrc; + __n128 * pDst = (__n128 *) pbDst; + + const __n128 chainIncrement1 = SYMCRYPT_SET_N128_U64( 0, 1 ); + const __n128 chainIncrement2 = SYMCRYPT_SET_N128_U64( 0, 2 ); + const __n128 chainIncrement8 = SYMCRYPT_SET_N128_U64( 0, 8 ); + + __n128 ctr0, ctr1, ctr2, ctr3, ctr4, ctr5, ctr6, ctr7; + __n128 c0, c1, c2, c3, c4, c5, c6, c7; + + __n128 state; + __n128 a0, a1, a2; + const __n64 vMultiplicationConstant = SYMCRYPT_SET_N64_U64(0xc200000000000000); + SIZE_T nBlocks = cbData / SYMCRYPT_GF128_BLOCK_SIZE; + SIZE_T todo; + + SYMCRYPT_ASSERT( (cbData & SYMCRYPT_GCM_BLOCK_MOD_MASK) == 0 ); // cbData is multiple of block size + + // Our chain variable is in integer format, not the MSBfirst format loaded from memory. + ctr0 = vrev64q_u8( chain ); + ctr1 = vaddq_u32( ctr0, chainIncrement1 ); + ctr2 = vaddq_u32( ctr0, chainIncrement2 ); + ctr3 = vaddq_u32( ctr1, chainIncrement2 ); + ctr4 = vaddq_u32( ctr2, chainIncrement2 ); + ctr5 = vaddq_u32( ctr3, chainIncrement2 ); + ctr6 = vaddq_u32( ctr4, chainIncrement2 ); + ctr7 = vaddq_u32( ctr5, chainIncrement2 ); + + state = *(__n128 *) pState; + + todo = SYMCRYPT_MIN( nBlocks, SYMCRYPT_GHASH_PMULL_HPOWERS ); + + CLMUL_3( state, GHASH_H_POWER(expandedKeyTable, todo), GHASH_Hx_POWER(expandedKeyTable, todo), a0, a1, a2 ); + + while( nBlocks >= 8 ) + { + // In this loop we always have 8 blocks to decrypt and GHASH + c0 = vrev64q_u8( ctr0 ); + c1 = vrev64q_u8( ctr1 ); + c2 = vrev64q_u8( ctr2 ); + c3 = vrev64q_u8( ctr3 ); + c4 = vrev64q_u8( ctr4 ); + c5 = vrev64q_u8( ctr5 ); + c6 = vrev64q_u8( ctr6 ); + c7 = vrev64q_u8( ctr7 ); + + ctr0 = vaddq_u32( ctr0, chainIncrement8 ); + ctr1 = vaddq_u32( ctr1, chainIncrement8 ); + ctr2 = vaddq_u32( ctr2, chainIncrement8 ); + ctr3 = vaddq_u32( ctr3, chainIncrement8 ); + ctr4 = vaddq_u32( ctr4, chainIncrement8 ); + ctr5 = vaddq_u32( ctr5, chainIncrement8 ); + ctr6 = vaddq_u32( ctr6, chainIncrement8 ); + ctr7 = vaddq_u32( ctr7, chainIncrement8 ); + + AES_GCM_ENCRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7, pGhashSrc, 8, expandedKeyTable, todo, a0, a1, a2 ); + + pDst[0] = veorq_u64( pSrc[0], c0 ); + pDst[1] = veorq_u64( pSrc[1], c1 ); + pDst[2] = veorq_u64( pSrc[2], c2 ); + pDst[3] = veorq_u64( pSrc[3], c3 ); + pDst[4] = veorq_u64( pSrc[4], c4 ); + pDst[5] = veorq_u64( pSrc[5], c5 ); + pDst[6] = veorq_u64( pSrc[6], c6 ); + pDst[7] = veorq_u64( pSrc[7], c7 ); + + pDst += 8; + pSrc += 8; + nBlocks -= 8; + + if (todo == 0) + { + CLMUL_3_POST( a0, a1, a2 ); + MODREDUCE( vMultiplicationConstant, a0, a1, a2, state ); + + if ( nBlocks > 0 ) + { + todo = SYMCRYPT_MIN( nBlocks, SYMCRYPT_GHASH_PMULL_HPOWERS ); + CLMUL_3( state, GHASH_H_POWER(expandedKeyTable, todo), GHASH_Hx_POWER(expandedKeyTable, todo), a0, a1, a2 ); + } + } + } + + if( nBlocks > 0 ) + { + // We have 1-7 blocks to GHASH and decrypt + // Do the exact number of GHASH blocks we need in parallel with generating either 4 or 8 blocks of AES-CTR + c0 = vrev64q_u8( ctr0 ); + c1 = vrev64q_u8( ctr1 ); + c2 = vrev64q_u8( ctr2 ); + c3 = vrev64q_u8( ctr3 ); + + if( nBlocks > 4 ) + { + c4 = vrev64q_u8( ctr4 ); + c5 = vrev64q_u8( ctr5 ); + c6 = vrev64q_u8( ctr6 ); + + AES_GCM_ENCRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7, pGhashSrc, nBlocks, expandedKeyTable, todo, a0, a1, a2 ); + } else { + AES_GCM_ENCRYPT_4( pExpandedKey, c0, c1, c2, c3, pGhashSrc, nBlocks, expandedKeyTable, todo, a0, a1, a2 ); + } + CLMUL_3_POST( a0, a1, a2 ); + MODREDUCE( vMultiplicationConstant, a0, a1, a2, state ); + + // Decrypt 1-7 blocks with pre-generated AES-CTR blocks + while( nBlocks >= 2 ) + { + ctr0 = vaddq_u32( ctr0, chainIncrement2 ); + + pDst[0] = veorq_u64( pSrc[0], c0 ); + pDst[1] = veorq_u64( pSrc[1], c1 ); + + pDst += 2; + pSrc += 2; + nBlocks -= 2; + c0 = c2; + c1 = c3; + c2 = c4; + c3 = c5; + c4 = c6; + } + + if( nBlocks > 0 ) + { + ctr0 = vaddq_u32( ctr0, chainIncrement1 ); + + pDst[0] = veorq_u64( pSrc[0], c0 ); + } + } + + chain = vrev64q_u8( ctr0 ); + *(__n128 *)pbChainingValue = chain; + *(__n128 *)pState = state; +} +#pragma runtime_checks( "u", restore ) +#pragma warning(pop) +#pragma clang attribute pop + +#endif diff --git a/libs/symcrypt/lib/aes-pattern.c b/libs/symcrypt/lib/aes-pattern.c new file mode 100644 index 00000000000..7fa78108d86 --- /dev/null +++ b/libs/symcrypt/lib/aes-pattern.c @@ -0,0 +1,348 @@ +// +// aes-pattern.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// This file contains "pattern" code for AES-related functions. It's not intended to be compiled +// directly; rather it is included by other aes-*.c files which define the macros used here. +// + +#if 0 +#pragma makedep header +#endif + +#if SYMCRYPT_CPU_ARM64 + +VOID +SYMCRYPT_CALL +SYMCRYPT_AesCtrMsbXxNeon( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + __n128 chain = *(__n128 *)pbChainingValue; + const __n128 * pSrc = (const __n128 *) pbSrc; + __n128 * pDst = (__n128 *) pbDst; + + const __n128 chainIncrement1 = SYMCRYPT_SET_N128_U64( 0, 1 ); + const __n128 chainIncrement2 = SYMCRYPT_SET_N128_U64( 0, 2 ); + const __n128 chainIncrement8 = SYMCRYPT_SET_N128_U64( 0, 8 ); + + __n128 ctr0, ctr1, ctr2, ctr3, ctr4, ctr5, ctr6, ctr7; + __n128 c0, c1, c2, c3, c4, c5, c6, c7; + + cbData &= ~(SYMCRYPT_AES_BLOCK_SIZE - 1); + + // Our chain variable is in integer format, not the MSBfirst format loaded from memory. + ctr0 = vrev64q_u8( chain ); + ctr1 = VADDQ_UXX( ctr0, chainIncrement1 ); + ctr2 = VADDQ_UXX( ctr0, chainIncrement2 ); + ctr3 = VADDQ_UXX( ctr1, chainIncrement2 ); + ctr4 = VADDQ_UXX( ctr2, chainIncrement2 ); + ctr5 = VADDQ_UXX( ctr3, chainIncrement2 ); + ctr6 = VADDQ_UXX( ctr4, chainIncrement2 ); + ctr7 = VADDQ_UXX( ctr5, chainIncrement2 ); + +/* + while cbData >= 5 * block + generate 8 blocks of key stream + if cbData < 8 * block + break; + process 8 blocks + if cbData >= 5 * block + process 5-7 blocks + done + if cbData >= 2 * block + generate 4 blocks of key stream + process 2-4 blocks + done + if cbData == 1 block + generate 1 block of key stream + process block +*/ + while( cbData >= 5 * SYMCRYPT_AES_BLOCK_SIZE ) + { + c0 = vrev64q_u8( ctr0 ); + c1 = vrev64q_u8( ctr1 ); + c2 = vrev64q_u8( ctr2 ); + c3 = vrev64q_u8( ctr3 ); + c4 = vrev64q_u8( ctr4 ); + c5 = vrev64q_u8( ctr5 ); + c6 = vrev64q_u8( ctr6 ); + c7 = vrev64q_u8( ctr7 ); + + ctr0 = VADDQ_UXX( ctr0, chainIncrement8 ); + ctr1 = VADDQ_UXX( ctr1, chainIncrement8 ); + ctr2 = VADDQ_UXX( ctr2, chainIncrement8 ); + ctr3 = VADDQ_UXX( ctr3, chainIncrement8 ); + ctr4 = VADDQ_UXX( ctr4, chainIncrement8 ); + ctr5 = VADDQ_UXX( ctr5, chainIncrement8 ); + ctr6 = VADDQ_UXX( ctr6, chainIncrement8 ); + ctr7 = VADDQ_UXX( ctr7, chainIncrement8 ); + + AES_ENCRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 ); + + if( cbData < 8 * SYMCRYPT_AES_BLOCK_SIZE ) + { + break; + } + + pDst[0] = veorq_u64( pSrc[0], c0 ); + pDst[1] = veorq_u64( pSrc[1], c1 ); + pDst[2] = veorq_u64( pSrc[2], c2 ); + pDst[3] = veorq_u64( pSrc[3], c3 ); + pDst[4] = veorq_u64( pSrc[4], c4 ); + pDst[5] = veorq_u64( pSrc[5], c5 ); + pDst[6] = veorq_u64( pSrc[6], c6 ); + pDst[7] = veorq_u64( pSrc[7], c7 ); + + pDst += 8; + pSrc += 8; + cbData -= 8 * SYMCRYPT_AES_BLOCK_SIZE; + } + + // + // At this point we have one of the two following cases: + // - cbData >= 5 * 16 and we have 8 blocks of key stream in c0-c7. ctr0-ctr7 is set to (c0+8)-(c7+8) + // - cbData < 5 * 16 and we have no blocks of key stream, and ctr0-ctr7 set to the next 8 counters to use + // + + if( cbData >= SYMCRYPT_AES_BLOCK_SIZE ) // quick exit of function if the request was a multiple of 8 blocks + { + if( cbData >= 5 * SYMCRYPT_AES_BLOCK_SIZE ) + { + // + // We already have the key stream + // + pDst[0] = veorq_u64( pSrc[0], c0 ); + pDst[1] = veorq_u64( pSrc[1], c1 ); + pDst[2] = veorq_u64( pSrc[2], c2 ); + pDst[3] = veorq_u64( pSrc[3], c3 ); + pDst[4] = veorq_u64( pSrc[4], c4 ); + chain = VSUBQ_UXX( ctr5, chainIncrement8 ); + + if( cbData >= 96 ) + { + chain = VSUBQ_UXX( ctr6, chainIncrement8 ); + pDst[5] = veorq_u64( pSrc[5], c5 ); + if( cbData >= 112 ) + { + chain = VSUBQ_UXX( ctr7, chainIncrement8 ); + pDst[6] = veorq_u64( pSrc[6], c6 ); + } + } + } + else if( cbData >= 2 * SYMCRYPT_AES_BLOCK_SIZE ) + { + // Produce 4 blocks of key stream + + chain = ctr2; // chain is only incremented by 2 for now + + c0 = vrev64q_u8( ctr0 ); + c1 = vrev64q_u8( ctr1 ); + c2 = vrev64q_u8( ctr2 ); + c3 = vrev64q_u8( ctr3 ); + + AES_ENCRYPT_4( pExpandedKey, c0, c1, c2, c3 ); + + pDst[0] = veorq_u64( pSrc[0], c0 ); + pDst[1] = veorq_u64( pSrc[1], c1 ); + if( cbData >= 48 ) + { + chain = ctr3; + pDst[2] = veorq_u64( pSrc[2], c2 ); + if( cbData >= 64 ) + { + chain = ctr4; + pDst[3] = veorq_u64( pSrc[3], c3 ); + } + } + } + else + { + // Exactly 1 block to process + chain = ctr1; + + c0 = vrev64q_u8( ctr0 ); + + AES_ENCRYPT_1( pExpandedKey, c0 ); + pDst[0] = veorq_u64( pSrc[0], c0 ); + } + } + else + { + chain = ctr0; + } + + chain = vrev64q_u8( chain ); + *(__n128 *)pbChainingValue = chain; +} + +#endif // SYMCRYPT_CPU_ARM64 + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + +VOID +SYMCRYPT_CALL +SYMCRYPT_AesCtrMsbXxXmm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + __m128i chain = _mm_loadu_si128( (__m128i *) pbChainingValue ); + + __m128i BYTE_REVERSE_ORDER = _mm_set_epi8( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ); + + __m128i chainIncrement1 = _mm_set_epi32( 0, 0, 0, 1 ); + __m128i chainIncrement2 = _mm_set_epi32( 0, 0, 0, 2 ); + __m128i chainIncrement3 = _mm_set_epi32( 0, 0, 0, 3 ); + //__m128i chainIncrement8 = _mm_set_epi32( 0, 0, 0, 8 ); + + __m128i c0, c1, c2, c3, c4, c5, c6, c7; + + cbData &= ~(SYMCRYPT_AES_BLOCK_SIZE - 1); + + chain = _mm_shuffle_epi8( chain, BYTE_REVERSE_ORDER ); + +/* + while cbData >= 5 * block + generate 8 blocks of key stream + if cbData < 8 * block + break; + process 8 blocks + if cbData >= 5 * block + process 5-7 blocks + done + if cbData > 1 block + generate 4 blocks of key stream + process 2-4 blocks + done + if cbData == 1 block + generate 1 block of key stream + process block +*/ + while( cbData >= 5 * SYMCRYPT_AES_BLOCK_SIZE ) + { + c0 = chain; + c1 = MM_ADD_EPIXX( chain, chainIncrement1 ); + c2 = MM_ADD_EPIXX( chain, chainIncrement2 ); + c3 = MM_ADD_EPIXX( c1, chainIncrement2 ); + c4 = MM_ADD_EPIXX( c2, chainIncrement2 ); + c5 = MM_ADD_EPIXX( c3, chainIncrement2 ); + c6 = MM_ADD_EPIXX( c4, chainIncrement2 ); + c7 = MM_ADD_EPIXX( c5, chainIncrement2 ); + chain = MM_ADD_EPIXX( c6, chainIncrement2 ); + + c0 = _mm_shuffle_epi8( c0, BYTE_REVERSE_ORDER ); + c1 = _mm_shuffle_epi8( c1, BYTE_REVERSE_ORDER ); + c2 = _mm_shuffle_epi8( c2, BYTE_REVERSE_ORDER ); + c3 = _mm_shuffle_epi8( c3, BYTE_REVERSE_ORDER ); + c4 = _mm_shuffle_epi8( c4, BYTE_REVERSE_ORDER ); + c5 = _mm_shuffle_epi8( c5, BYTE_REVERSE_ORDER ); + c6 = _mm_shuffle_epi8( c6, BYTE_REVERSE_ORDER ); + c7 = _mm_shuffle_epi8( c7, BYTE_REVERSE_ORDER ); + + AES_ENCRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 ); + + if( cbData < 8 * SYMCRYPT_AES_BLOCK_SIZE ) + { + break; + } + + _mm_storeu_si128( (__m128i *) (pbDst + 0), _mm_xor_si128( c0, _mm_loadu_si128( ( __m128i * ) (pbSrc + 0 ) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 16), _mm_xor_si128( c1, _mm_loadu_si128( ( __m128i * ) (pbSrc + 16 ) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 32), _mm_xor_si128( c2, _mm_loadu_si128( ( __m128i * ) (pbSrc + 32 ) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 48), _mm_xor_si128( c3, _mm_loadu_si128( ( __m128i * ) (pbSrc + 48 ) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 64), _mm_xor_si128( c4, _mm_loadu_si128( ( __m128i * ) (pbSrc + 64 ) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 80), _mm_xor_si128( c5, _mm_loadu_si128( ( __m128i * ) (pbSrc + 80 ) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 96), _mm_xor_si128( c6, _mm_loadu_si128( ( __m128i * ) (pbSrc + 96 ) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst +112), _mm_xor_si128( c7, _mm_loadu_si128( ( __m128i * ) (pbSrc +112 ) ) ) ); + pbDst += 8 * SYMCRYPT_AES_BLOCK_SIZE; + pbSrc += 8 * SYMCRYPT_AES_BLOCK_SIZE; + cbData -= 8 * SYMCRYPT_AES_BLOCK_SIZE; + } + + // + // At this point we have one of the two following cases: + // - cbData >= 5 * 16 and we have 8 blocks of key stream in c0-c7. chain is set to c7 + 1 + // - cbData < 5 * 16 and we have no blocks of key stream, with chain the next value to use + // + + if( cbData >= SYMCRYPT_AES_BLOCK_SIZE ) // quick exit of function if the request was a multiple of 8 blocks + { + if( cbData >= 5 * SYMCRYPT_AES_BLOCK_SIZE ) + { + // + // We already have the key stream + // + _mm_storeu_si128( (__m128i *) (pbDst + 0), _mm_xor_si128( c0, _mm_loadu_si128( ( __m128i * ) (pbSrc + 0 ) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 16), _mm_xor_si128( c1, _mm_loadu_si128( ( __m128i * ) (pbSrc + 16 ) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 32), _mm_xor_si128( c2, _mm_loadu_si128( ( __m128i * ) (pbSrc + 32 ) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 48), _mm_xor_si128( c3, _mm_loadu_si128( ( __m128i * ) (pbSrc + 48 ) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 64), _mm_xor_si128( c4, _mm_loadu_si128( ( __m128i * ) (pbSrc + 64 ) ) ) ); + chain = MM_SUB_EPIXX( chain, chainIncrement3 ); + + if( cbData >= 96 ) + { + chain = MM_ADD_EPIXX( chain, chainIncrement1 ); + _mm_storeu_si128( (__m128i *) (pbDst + 80), _mm_xor_si128( c5, _mm_loadu_si128( ( __m128i * ) (pbSrc + 80 ) ) ) ); + if( cbData >= 112 ) + { + chain = MM_ADD_EPIXX( chain, chainIncrement1 ); + _mm_storeu_si128( (__m128i *) (pbDst + 96), _mm_xor_si128( c6, _mm_loadu_si128( ( __m128i * ) (pbSrc + 96 ) ) ) ); + } + } + } + else if( cbData >= 2 * SYMCRYPT_AES_BLOCK_SIZE ) + { + // Produce 4 blocks of key stream + + c0 = chain; + c1 = MM_ADD_EPIXX( chain, chainIncrement1 ); + c2 = MM_ADD_EPIXX( chain, chainIncrement2 ); + c3 = MM_ADD_EPIXX( c1, chainIncrement2 ); + chain = c2; // chain is only incremented by 2 for now + + c0 = _mm_shuffle_epi8( c0, BYTE_REVERSE_ORDER ); + c1 = _mm_shuffle_epi8( c1, BYTE_REVERSE_ORDER ); + c2 = _mm_shuffle_epi8( c2, BYTE_REVERSE_ORDER ); + c3 = _mm_shuffle_epi8( c3, BYTE_REVERSE_ORDER ); + + AES_ENCRYPT_4( pExpandedKey, c0, c1, c2, c3 ); + + _mm_storeu_si128( (__m128i *) (pbDst + 0), _mm_xor_si128( c0, _mm_loadu_si128( ( __m128i * ) (pbSrc + 0 ) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 16), _mm_xor_si128( c1, _mm_loadu_si128( ( __m128i * ) (pbSrc + 16 ) ) ) ); + if( cbData >= 48 ) + { + chain = MM_ADD_EPIXX( chain, chainIncrement1 ); + _mm_storeu_si128( (__m128i *) (pbDst + 32), _mm_xor_si128( c2, _mm_loadu_si128( ( __m128i * ) (pbSrc + 32 ) ) ) ); + if( cbData >= 64 ) + { + chain = MM_ADD_EPIXX( chain, chainIncrement1 ); + _mm_storeu_si128( (__m128i *) (pbDst + 48), _mm_xor_si128( c3, _mm_loadu_si128( ( __m128i * ) (pbSrc + 48 ) ) ) ); + } + } + } + else + { + // Exactly 1 block to process + c0 = chain; + chain = MM_ADD_EPIXX( chain, chainIncrement1 ); + + c0 = _mm_shuffle_epi8( c0, BYTE_REVERSE_ORDER ); + + AES_ENCRYPT_1( pExpandedKey, c0 ); + _mm_storeu_si128( (__m128i *) (pbDst + 0), _mm_xor_si128( c0, _mm_loadu_si128( ( __m128i * ) (pbSrc + 0 ) ) ) ); + } + } + + chain = _mm_shuffle_epi8( chain, BYTE_REVERSE_ORDER ); + _mm_storeu_si128( (__m128i *) pbChainingValue, chain ); +} + +#endif // SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 diff --git a/libs/symcrypt/lib/aes-xmm.c b/libs/symcrypt/lib/aes-xmm.c new file mode 100644 index 00000000000..c300533281f --- /dev/null +++ b/libs/symcrypt/lib/aes-xmm.c @@ -0,0 +1,1792 @@ +// +// aes-xmm.c code for AES implementation +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// All XMM code for AES operations +// Requires compiler support for ssse3, aesni and pclmulqdq +// + +#include "precomp.h" + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + +#include "xtsaes_definitions.h" +#include "ghash_definitions.h" + +#ifdef __clang__ +#pragma clang attribute push (__attribute__((target("ssse3,aes,pclmul"))), apply_to=function) +#else +#pragma GCC push_options +#pragma GCC target("ssse3,aes,pclmul") +#endif + +VOID +SYMCRYPT_CALL +SymCryptAes4SboxXmm( _In_reads_(4) PCBYTE pIn, _Out_writes_(4) PBYTE pOut ) +{ + __m128i x; + x = _mm_set1_epi32( *(int *) pIn ); + + x = _mm_aeskeygenassist_si128( x, 0 ); + + // Could use _mm_storeu_si32( pOut, x ) but it is missing from some headers and _mm_store_ss will be as fast + _mm_store_ss( (float *) pOut, _mm_castsi128_ps(x) ); +} + +VOID +SYMCRYPT_CALL +SymCryptAesCreateDecryptionRoundKeyXmm( + _In_reads_(16) PCBYTE pEncryptionRoundKey, + _Out_writes_(16) PBYTE pDecryptionRoundKey ) +{ + // + // On x86 our key structure is only 4-aligned (the best we can do) so we use unaligned load/stores. + // On Amd64 our round keys are aligned, but recent CPUs have fast unaligned load/store if the address is + // actually aligned properly. + // + _mm_storeu_si128( (__m128i *) pDecryptionRoundKey, _mm_aesimc_si128( _mm_loadu_si128( (__m128i *)pEncryptionRoundKey ) ) ); +} + +// +// The latency of AES instruction has increased up to 8 cycles in Ivy Bridge, +// and back to 7 in Haswell. +// We use 8-parallel code to expose the maximum parallelism to the CPU. +// On x86 it will introduce some register spilling, but the load/stores +// should be able to hide behind the AES instruction latencies. +// Silvermont x86 CPUs has AES-NI with latency = 8 and throughput = 5, so there +// the CPU parallelism is low. +// For things like BitLocker that is fine, but other uses, such as GCM & AES_CTR_DRBG +// use odd sizes. +// We try to do 5-8 blocks in 8-parallel code, 2-4 blocks in 4-parallel code, and +// 1 block in 1-parallel code. +// This is a compromise; the big cores can do 8 parallel in about the time of a 4-parallel, +// but Silvermont cannot and would pay a big price on small requests if we only use 8-parallel. +// Doing only 8-parallel and then 1-parallel would penalize the big cores a lot. +// +// We used to have 7-parallel code, but common request sizes are not multiples of 7 +// blocks so we end up doing a lot of extra work. This is especially expensive on +// Silvermont where the extra work isn't hidden in the latencies. +// + +#define AES_ENCRYPT_1( pExpandedKey, c0 ) \ +{ \ + const BYTE (*keyPtr)[4][4]; \ + const BYTE (*keyLimit)[4][4]; \ + __m128i roundkey; \ +\ + keyPtr = &pExpandedKey->RoundKey[0]; \ + keyLimit = pExpandedKey->lastEncRoundKey; \ +\ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ + keyPtr ++; \ +\ + c0 = _mm_xor_si128( c0, roundkey ); \ +\ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ + keyPtr ++; \ + c0 = _mm_aesenc_si128( c0, roundkey ); \ +\ + do \ + { \ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ + keyPtr ++; \ + c0 = _mm_aesenc_si128( c0, roundkey ); \ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ + keyPtr ++; \ + c0 = _mm_aesenc_si128( c0, roundkey ); \ + } while( keyPtr < keyLimit ); \ +\ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ +\ + c0 = _mm_aesenclast_si128( c0, roundkey ); \ +}; + + +// Perform AES encryption without the first round key and with a specified last round key +// +// For algorithms where performance is dominated by a chain of dependent AES rounds (i.e. CBC encryption, CCM, CMAC) +// we can gain a reasonable performance uplift by computing (last round key ^ next plaintext block ^ first round key) +// off the critical path and using this computed value in place of last round key in AESENCLAST instructions. +#define AES_ENCRYPT_1_CHAIN( pExpandedKey, cipherState, mergedLastRoundKey ) \ +{ \ + const BYTE (*keyPtr)[4][4]; \ + const BYTE (*keyLimit)[4][4]; \ + __m128i roundkey; \ +\ + keyPtr = &pExpandedKey->RoundKey[1]; \ + keyLimit = pExpandedKey->lastEncRoundKey; \ +\ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ + keyPtr ++; \ +\ + cipherState = _mm_aesenc_si128( cipherState, roundkey ); \ +\ + do \ + { \ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ + keyPtr ++; \ + cipherState = _mm_aesenc_si128( cipherState, roundkey ); \ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ + keyPtr ++; \ + cipherState = _mm_aesenc_si128( cipherState, roundkey ); \ + } while( keyPtr < keyLimit ); \ +\ + cipherState = _mm_aesenclast_si128( cipherState, mergedLastRoundKey ); \ +}; + +#define AES_ENCRYPT_4( pExpandedKey, c0, c1, c2, c3 ) \ +{ \ + const BYTE (*keyPtr)[4][4]; \ + const BYTE (*keyLimit)[4][4]; \ + __m128i roundkey; \ +\ + keyPtr = &pExpandedKey->RoundKey[0]; \ + keyLimit = pExpandedKey->lastEncRoundKey; \ +\ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ + keyPtr ++; \ +\ + c0 = _mm_xor_si128( c0, roundkey ); \ + c1 = _mm_xor_si128( c1, roundkey ); \ + c2 = _mm_xor_si128( c2, roundkey ); \ + c3 = _mm_xor_si128( c3, roundkey ); \ +\ + do \ + { \ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ + keyPtr ++; \ + c0 = _mm_aesenc_si128( c0, roundkey ); \ + c1 = _mm_aesenc_si128( c1, roundkey ); \ + c2 = _mm_aesenc_si128( c2, roundkey ); \ + c3 = _mm_aesenc_si128( c3, roundkey ); \ + } while( keyPtr < keyLimit ); \ +\ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ +\ + c0 = _mm_aesenclast_si128( c0, roundkey ); \ + c1 = _mm_aesenclast_si128( c1, roundkey ); \ + c2 = _mm_aesenclast_si128( c2, roundkey ); \ + c3 = _mm_aesenclast_si128( c3, roundkey ); \ +}; + +#define AES_ENCRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 ) \ +{ \ + const BYTE (*keyPtr)[4][4]; \ + const BYTE (*keyLimit)[4][4]; \ + __m128i roundkey; \ +\ + keyPtr = &pExpandedKey->RoundKey[0]; \ + keyLimit = pExpandedKey->lastEncRoundKey; \ +\ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ + keyPtr ++; \ +\ + c0 = _mm_xor_si128( c0, roundkey ); \ + c1 = _mm_xor_si128( c1, roundkey ); \ + c2 = _mm_xor_si128( c2, roundkey ); \ + c3 = _mm_xor_si128( c3, roundkey ); \ + c4 = _mm_xor_si128( c4, roundkey ); \ + c5 = _mm_xor_si128( c5, roundkey ); \ + c6 = _mm_xor_si128( c6, roundkey ); \ + c7 = _mm_xor_si128( c7, roundkey ); \ +\ + do \ + { \ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ + keyPtr ++; \ + c0 = _mm_aesenc_si128( c0, roundkey ); \ + c1 = _mm_aesenc_si128( c1, roundkey ); \ + c2 = _mm_aesenc_si128( c2, roundkey ); \ + c3 = _mm_aesenc_si128( c3, roundkey ); \ + c4 = _mm_aesenc_si128( c4, roundkey ); \ + c5 = _mm_aesenc_si128( c5, roundkey ); \ + c6 = _mm_aesenc_si128( c6, roundkey ); \ + c7 = _mm_aesenc_si128( c7, roundkey ); \ + } while( keyPtr < keyLimit ); \ +\ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ +\ + c0 = _mm_aesenclast_si128( c0, roundkey ); \ + c1 = _mm_aesenclast_si128( c1, roundkey ); \ + c2 = _mm_aesenclast_si128( c2, roundkey ); \ + c3 = _mm_aesenclast_si128( c3, roundkey ); \ + c4 = _mm_aesenclast_si128( c4, roundkey ); \ + c5 = _mm_aesenclast_si128( c5, roundkey ); \ + c6 = _mm_aesenclast_si128( c6, roundkey ); \ + c7 = _mm_aesenclast_si128( c7, roundkey ); \ +}; + +#define AES_DECRYPT_1( pExpandedKey, c0 ) \ +{ \ + const BYTE (*keyPtr)[4][4]; \ + const BYTE (*keyLimit)[4][4]; \ + __m128i roundkey; \ +\ + keyPtr = pExpandedKey->lastEncRoundKey; \ + keyLimit = pExpandedKey->lastDecRoundKey; \ +\ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ + keyPtr ++; \ +\ + c0 = _mm_xor_si128( c0, roundkey ); \ +\ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ + keyPtr ++; \ + c0 = _mm_aesdec_si128( c0, roundkey ); \ +\ + do \ + { \ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ + keyPtr ++; \ + c0 = _mm_aesdec_si128( c0, roundkey ); \ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ + keyPtr ++; \ + c0 = _mm_aesdec_si128( c0, roundkey ); \ + } while( keyPtr < keyLimit ); \ +\ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ +\ + c0 = _mm_aesdeclast_si128( c0, roundkey ); \ +}; + +#define AES_DECRYPT_4( pExpandedKey, c0, c1, c2, c3 ) \ +{ \ + const BYTE (*keyPtr)[4][4]; \ + const BYTE (*keyLimit)[4][4]; \ + __m128i roundkey; \ +\ + keyPtr = pExpandedKey->lastEncRoundKey; \ + keyLimit = pExpandedKey->lastDecRoundKey; \ +\ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ + keyPtr ++; \ +\ + c0 = _mm_xor_si128( c0, roundkey ); \ + c1 = _mm_xor_si128( c1, roundkey ); \ + c2 = _mm_xor_si128( c2, roundkey ); \ + c3 = _mm_xor_si128( c3, roundkey ); \ +\ + do \ + { \ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ + keyPtr ++; \ + c0 = _mm_aesdec_si128( c0, roundkey ); \ + c1 = _mm_aesdec_si128( c1, roundkey ); \ + c2 = _mm_aesdec_si128( c2, roundkey ); \ + c3 = _mm_aesdec_si128( c3, roundkey ); \ + } while( keyPtr < keyLimit ); \ +\ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ +\ + c0 = _mm_aesdeclast_si128( c0, roundkey ); \ + c1 = _mm_aesdeclast_si128( c1, roundkey ); \ + c2 = _mm_aesdeclast_si128( c2, roundkey ); \ + c3 = _mm_aesdeclast_si128( c3, roundkey ); \ +}; + +#define AES_DECRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 ) \ +{ \ + const BYTE (*keyPtr)[4][4]; \ + const BYTE (*keyLimit)[4][4]; \ + __m128i roundkey; \ +\ + keyPtr = pExpandedKey->lastEncRoundKey; \ + keyLimit = pExpandedKey->lastDecRoundKey; \ +\ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ + keyPtr ++; \ +\ + c0 = _mm_xor_si128( c0, roundkey ); \ + c1 = _mm_xor_si128( c1, roundkey ); \ + c2 = _mm_xor_si128( c2, roundkey ); \ + c3 = _mm_xor_si128( c3, roundkey ); \ + c4 = _mm_xor_si128( c4, roundkey ); \ + c5 = _mm_xor_si128( c5, roundkey ); \ + c6 = _mm_xor_si128( c6, roundkey ); \ + c7 = _mm_xor_si128( c7, roundkey ); \ +\ + do \ + { \ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ + keyPtr ++; \ + c0 = _mm_aesdec_si128( c0, roundkey ); \ + c1 = _mm_aesdec_si128( c1, roundkey ); \ + c2 = _mm_aesdec_si128( c2, roundkey ); \ + c3 = _mm_aesdec_si128( c3, roundkey ); \ + c4 = _mm_aesdec_si128( c4, roundkey ); \ + c5 = _mm_aesdec_si128( c5, roundkey ); \ + c6 = _mm_aesdec_si128( c6, roundkey ); \ + c7 = _mm_aesdec_si128( c7, roundkey ); \ + } while( keyPtr < keyLimit ); \ +\ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ +\ + c0 = _mm_aesdeclast_si128( c0, roundkey ); \ + c1 = _mm_aesdeclast_si128( c1, roundkey ); \ + c2 = _mm_aesdeclast_si128( c2, roundkey ); \ + c3 = _mm_aesdeclast_si128( c3, roundkey ); \ + c4 = _mm_aesdeclast_si128( c4, roundkey ); \ + c5 = _mm_aesdeclast_si128( c5, roundkey ); \ + c6 = _mm_aesdeclast_si128( c6, roundkey ); \ + c7 = _mm_aesdeclast_si128( c7, roundkey ); \ +}; + + +// +// The EncryptXmm code is tested through the CFB mode encryption which has no further optimizations. +// +VOID +SYMCRYPT_CALL +SymCryptAesEncryptXmm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbDst ) +{ + __m128i c; + + c = _mm_loadu_si128( ( __m128i * ) pbSrc); + + AES_ENCRYPT_1( pExpandedKey, c ); + + _mm_storeu_si128( (__m128i *) pbDst, c ); +} + +// +// The DecryptXmm code is tested through the EcbDecrypt calls which has no further optimizations. +// +VOID +SYMCRYPT_CALL +SymCryptAesDecryptXmm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbDst ) +{ + __m128i c; + + c = _mm_loadu_si128( ( __m128i * ) pbSrc); + + AES_DECRYPT_1( pExpandedKey, c ); + + _mm_storeu_si128( (__m128i *) pbDst, c ); +} + +// Disable warnings and VC++ runtime checks for use of uninitialized values (by design) +#pragma warning(push) +#pragma warning( disable: 6001 4701 ) +#pragma runtime_checks( "u", off ) +VOID +SYMCRYPT_CALL +SymCryptAesEcbEncryptXmm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + __m128i c0, c1, c2, c3, c4, c5, c6, c7; + + while( cbData >= 8 * SYMCRYPT_AES_BLOCK_SIZE ) + { + c0 = _mm_loadu_si128( ( __m128i * ) (pbSrc + 0 )); + c1 = _mm_loadu_si128( ( __m128i * ) (pbSrc + 16 )); + c2 = _mm_loadu_si128( ( __m128i * ) (pbSrc + 32 )); + c3 = _mm_loadu_si128( ( __m128i * ) (pbSrc + 48 )); + c4 = _mm_loadu_si128( ( __m128i * ) (pbSrc + 64 )); + c5 = _mm_loadu_si128( ( __m128i * ) (pbSrc + 80 )); + c6 = _mm_loadu_si128( ( __m128i * ) (pbSrc + 96 )); + c7 = _mm_loadu_si128( ( __m128i * ) (pbSrc +112 )); + + AES_ENCRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 ); + + _mm_storeu_si128( (__m128i *) (pbDst + 0 ), c0 ); + _mm_storeu_si128( (__m128i *) (pbDst + 16 ), c1 ); + _mm_storeu_si128( (__m128i *) (pbDst + 32 ), c2 ); + _mm_storeu_si128( (__m128i *) (pbDst + 48 ), c3 ); + _mm_storeu_si128( (__m128i *) (pbDst + 64 ), c4 ); + _mm_storeu_si128( (__m128i *) (pbDst + 80 ), c5 ); + _mm_storeu_si128( (__m128i *) (pbDst + 96 ), c6 ); + _mm_storeu_si128( (__m128i *) (pbDst +112 ), c7 ); + + pbSrc += 8 * SYMCRYPT_AES_BLOCK_SIZE; + pbDst += 8 * SYMCRYPT_AES_BLOCK_SIZE; + cbData -= 8 * SYMCRYPT_AES_BLOCK_SIZE; + } + + if( cbData < 16 ) + { + return; + } + + c0 = _mm_loadu_si128( ( __m128i * ) (pbSrc + 0 )); + if( cbData >= 32 ) + { + c1 = _mm_loadu_si128( ( __m128i * ) (pbSrc + 16 )); + if( cbData >= 48 ) + { + c2 = _mm_loadu_si128( ( __m128i * ) (pbSrc + 32 )); + if( cbData >= 64 ) + { + c3 = _mm_loadu_si128( ( __m128i * ) (pbSrc + 48 )); + if( cbData >= 80 ) + { + c4 = _mm_loadu_si128( ( __m128i * ) (pbSrc + 64 )); + if( cbData >= 96 ) + { + c5 = _mm_loadu_si128( ( __m128i * ) (pbSrc + 80 )); + if( cbData >= 112 ) + { + c6 = _mm_loadu_si128( ( __m128i * ) (pbSrc + 96 )); + } + } + } + } + } + } + + if( cbData >= 5 * SYMCRYPT_AES_BLOCK_SIZE ) + { + AES_ENCRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 ); + } + else if( cbData >= 2 * SYMCRYPT_AES_BLOCK_SIZE ) + { + AES_ENCRYPT_4( pExpandedKey, c0, c1, c2, c3 ); + } + else + { + AES_ENCRYPT_1( pExpandedKey, c0 ); + } + + _mm_storeu_si128( (__m128i *) (pbDst + 0 ), c0 ); + if( cbData >= 32 ) + { + _mm_storeu_si128( (__m128i *) (pbDst + 16 ), c1 ); + if( cbData >= 48 ) + { + _mm_storeu_si128( (__m128i *) (pbDst + 32 ), c2 ); + if( cbData >= 64 ) + { + _mm_storeu_si128( (__m128i *) (pbDst + 48 ), c3 ); + if( cbData >= 80 ) + { + _mm_storeu_si128( (__m128i *) (pbDst + 64 ), c4 ); + if( cbData >= 96 ) + { + _mm_storeu_si128( (__m128i *) (pbDst + 80 ), c5 ); + if( cbData >= 112 ) + { + _mm_storeu_si128( (__m128i *) (pbDst + 96 ), c6 ); + } + } + } + } + } + } +} +#pragma runtime_checks( "u", restore ) +#pragma warning( pop ) + + + +VOID +SYMCRYPT_CALL +SymCryptAesCbcEncryptXmm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + __m128i c = _mm_loadu_si128( (__m128i *) pbChainingValue ); + __m128i rk0 = _mm_loadu_si128( (__m128i *) &pExpandedKey->RoundKey[0] ); + __m128i rkLast = _mm_loadu_si128( (__m128i *) pExpandedKey->lastEncRoundKey ); + __m128i d; + + if (cbData < SYMCRYPT_AES_BLOCK_SIZE) + return; + + // This algorithm is dominated by chain of dependent AES rounds, so we want to avoid XOR + // instructions on the critical path where possible + // We can compute (last round key ^ next plaintext block ^ first round key) off the critical + // path and use this with AES_ENCRYPT_1_CHAIN so that only AES instructions write to c in + // the main loop + d = _mm_xor_si128( _mm_loadu_si128( (__m128i *) pbSrc ), rk0 ); + c = _mm_xor_si128( c, d ); + pbSrc += SYMCRYPT_AES_BLOCK_SIZE; + cbData -= SYMCRYPT_AES_BLOCK_SIZE; + + while( cbData >= SYMCRYPT_AES_BLOCK_SIZE ) + { + d = _mm_xor_si128( _mm_loadu_si128( (__m128i *) pbSrc ), rk0 ); + AES_ENCRYPT_1_CHAIN( pExpandedKey, c, _mm_xor_si128(d, rkLast ) ); + _mm_storeu_si128( (__m128i *) pbDst, _mm_xor_si128(c, d) ); + + pbSrc += SYMCRYPT_AES_BLOCK_SIZE; + pbDst += SYMCRYPT_AES_BLOCK_SIZE; + cbData -= SYMCRYPT_AES_BLOCK_SIZE; + } + AES_ENCRYPT_1_CHAIN( pExpandedKey, c, rkLast ); + _mm_storeu_si128( (__m128i *) pbDst, c ); + _mm_storeu_si128( (__m128i *) pbChainingValue, c ); +} + +// Disable warnings and VC++ runtime checks for use of uninitialized values (by design) +#pragma warning(push) +#pragma warning( disable: 6001 4701 ) +#pragma runtime_checks( "u", off ) +VOID +SYMCRYPT_CALL +SymCryptAesCbcDecryptXmm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + __m128i chain; + __m128i c0, c1, c2, c3, c4, c5, c6, c7; + __m128i d0, d1, d2, d3, d4, d5, d6, d7; + + if( cbData < SYMCRYPT_AES_BLOCK_SIZE ) + { + return; + } + + chain = _mm_loadu_si128( (__m128i *) pbChainingValue ); + + // + // First we do all multiples of 8 blocks + // + + while( cbData >= 8 * SYMCRYPT_AES_BLOCK_SIZE ) + { + d0 = c0 = _mm_loadu_si128( (__m128i *) (pbSrc + 0 * SYMCRYPT_AES_BLOCK_SIZE ) ); + d1 = c1 = _mm_loadu_si128( (__m128i *) (pbSrc + 1 * SYMCRYPT_AES_BLOCK_SIZE ) ); + d2 = c2 = _mm_loadu_si128( (__m128i *) (pbSrc + 2 * SYMCRYPT_AES_BLOCK_SIZE ) ); + d3 = c3 = _mm_loadu_si128( (__m128i *) (pbSrc + 3 * SYMCRYPT_AES_BLOCK_SIZE ) ); + d4 = c4 = _mm_loadu_si128( (__m128i *) (pbSrc + 4 * SYMCRYPT_AES_BLOCK_SIZE ) ); + d5 = c5 = _mm_loadu_si128( (__m128i *) (pbSrc + 5 * SYMCRYPT_AES_BLOCK_SIZE ) ); + d6 = c6 = _mm_loadu_si128( (__m128i *) (pbSrc + 6 * SYMCRYPT_AES_BLOCK_SIZE ) ); + d7 = c7 = _mm_loadu_si128( (__m128i *) (pbSrc + 7 * SYMCRYPT_AES_BLOCK_SIZE ) ); + + AES_DECRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 ); + + c0 = _mm_xor_si128( c0, chain ); + c1 = _mm_xor_si128( c1, d0 ); + c2 = _mm_xor_si128( c2, d1 ); + c3 = _mm_xor_si128( c3, d2 ); + c4 = _mm_xor_si128( c4, d3 ); + c5 = _mm_xor_si128( c5, d4 ); + c6 = _mm_xor_si128( c6, d5 ); + c7 = _mm_xor_si128( c7, d6 ); + chain = d7; + + _mm_storeu_si128( (__m128i *) (pbDst + 0 * SYMCRYPT_AES_BLOCK_SIZE ), c0 ); + _mm_storeu_si128( (__m128i *) (pbDst + 1 * SYMCRYPT_AES_BLOCK_SIZE ), c1 ); + _mm_storeu_si128( (__m128i *) (pbDst + 2 * SYMCRYPT_AES_BLOCK_SIZE ), c2 ); + _mm_storeu_si128( (__m128i *) (pbDst + 3 * SYMCRYPT_AES_BLOCK_SIZE ), c3 ); + _mm_storeu_si128( (__m128i *) (pbDst + 4 * SYMCRYPT_AES_BLOCK_SIZE ), c4 ); + _mm_storeu_si128( (__m128i *) (pbDst + 5 * SYMCRYPT_AES_BLOCK_SIZE ), c5 ); + _mm_storeu_si128( (__m128i *) (pbDst + 6 * SYMCRYPT_AES_BLOCK_SIZE ), c6 ); + _mm_storeu_si128( (__m128i *) (pbDst + 7 * SYMCRYPT_AES_BLOCK_SIZE ), c7 ); + + pbSrc += 8 * SYMCRYPT_AES_BLOCK_SIZE; + pbDst += 8 * SYMCRYPT_AES_BLOCK_SIZE; + cbData -= 8 * SYMCRYPT_AES_BLOCK_SIZE; + } + + if( cbData >= 16 ) + { + // + // There is remaining work to be done + // + d0 = c0 = _mm_loadu_si128( (__m128i *) (pbSrc + 0 * SYMCRYPT_AES_BLOCK_SIZE ) ); + if( cbData >= 32 ) + { + d1 = c1 = _mm_loadu_si128( (__m128i *) (pbSrc + 1 * SYMCRYPT_AES_BLOCK_SIZE ) ); + if( cbData >= 48 ) + { + d2 = c2 = _mm_loadu_si128( (__m128i *) (pbSrc + 2 * SYMCRYPT_AES_BLOCK_SIZE ) ); + if( cbData >= 64 ) + { + d3 = c3 = _mm_loadu_si128( (__m128i *) (pbSrc + 3 * SYMCRYPT_AES_BLOCK_SIZE ) ); + if( cbData >= 80 ) + { + d4 = c4 = _mm_loadu_si128( (__m128i *) (pbSrc + 4 * SYMCRYPT_AES_BLOCK_SIZE ) ); + if( cbData >= 96 ) + { + d5 = c5 = _mm_loadu_si128( (__m128i *) (pbSrc + 5 * SYMCRYPT_AES_BLOCK_SIZE ) ); + if( cbData >= 112 ) + { + d6 = c6 = _mm_loadu_si128( (__m128i *) (pbSrc + 6 * SYMCRYPT_AES_BLOCK_SIZE ) ); + } + } + } + } + } + } + + // + // Decrypt 1, 4, or 8 blocks in AES-CBC mode. This might decrypt uninitialized registers, + // but those will not be used when we store the results. + // + if( cbData > 4 * SYMCRYPT_AES_BLOCK_SIZE ) + { + AES_DECRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 ); + c0 = _mm_xor_si128( c0, chain ); + c1 = _mm_xor_si128( c1, d0 ); + c2 = _mm_xor_si128( c2, d1 ); + c3 = _mm_xor_si128( c3, d2 ); + c4 = _mm_xor_si128( c4, d3 ); + c5 = _mm_xor_si128( c5, d4 ); + c6 = _mm_xor_si128( c6, d5 ); + } + else if( cbData > SYMCRYPT_AES_BLOCK_SIZE ) + { + AES_DECRYPT_4( pExpandedKey, c0, c1, c2, c3 ); + c0 = _mm_xor_si128( c0, chain ); + c1 = _mm_xor_si128( c1, d0 ); + c2 = _mm_xor_si128( c2, d1 ); + c3 = _mm_xor_si128( c3, d2 ); + } else + { + AES_DECRYPT_1( pExpandedKey, c0 ); + c0 = _mm_xor_si128( c0, chain ); + } + + chain = _mm_loadu_si128( (__m128i *) (pbSrc + cbData - SYMCRYPT_AES_BLOCK_SIZE ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 0 * SYMCRYPT_AES_BLOCK_SIZE ), c0 ); + if( cbData >= 32 ) + { + _mm_storeu_si128( (__m128i *) (pbDst + 1 * SYMCRYPT_AES_BLOCK_SIZE ), c1 ); + if( cbData >= 48 ) + { + _mm_storeu_si128( (__m128i *) (pbDst + 2 * SYMCRYPT_AES_BLOCK_SIZE ), c2 ); + if( cbData >= 64 ) + { + _mm_storeu_si128( (__m128i *) (pbDst + 3 * SYMCRYPT_AES_BLOCK_SIZE ), c3 ); + if( cbData >= 80 ) + { + _mm_storeu_si128( (__m128i *) (pbDst + 4 * SYMCRYPT_AES_BLOCK_SIZE ), c4 ); + if( cbData >= 96 ) + { + _mm_storeu_si128( (__m128i *) (pbDst + 5 * SYMCRYPT_AES_BLOCK_SIZE ), c5 ); + if( cbData >= 112 ) + { + _mm_storeu_si128( (__m128i *) (pbDst + 6 * SYMCRYPT_AES_BLOCK_SIZE ), c6 ); + } + } + } + } + } + } + } + + _mm_storeu_si128( (__m128i *) pbChainingValue, chain ); + + return; +} +#pragma runtime_checks( "u", restore ) +#pragma warning( pop ) + +VOID +SYMCRYPT_CALL +SymCryptAesCbcMacXmm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + __m128i c = _mm_loadu_si128( (__m128i *) pbChainingValue ); + __m128i rk0 = _mm_loadu_si128( (__m128i *) &pExpandedKey->RoundKey[0] ); + __m128i rkLast = _mm_loadu_si128( (__m128i *) pExpandedKey->lastEncRoundKey ); + __m128i d, rk0AndLast; + + if (cbData < SYMCRYPT_AES_BLOCK_SIZE) + return; + + // This algorithm is dominated by chain of dependent AES rounds, so we want to avoid XOR + // instructions on the critical path where possible + // We can compute (last round key ^ next plaintext block ^ first round key) off the critical + // path and use this with AES_ENCRYPT_1_CHAIN so that only AES instructions write to c in + // the main loop + d = _mm_xor_si128( _mm_loadu_si128( (__m128i *) pbData ), rk0 ); + c = _mm_xor_si128( c, d ); + pbData += SYMCRYPT_AES_BLOCK_SIZE; + cbData -= SYMCRYPT_AES_BLOCK_SIZE; + + // As we don't compute ciphertext here, we only need to XOR rk0 and rkLast once + rk0AndLast = _mm_xor_si128( rk0, rkLast ); + + while( cbData >= SYMCRYPT_AES_BLOCK_SIZE ) + { + d = _mm_xor_si128( _mm_loadu_si128( (__m128i *) pbData ), rk0AndLast ); + AES_ENCRYPT_1_CHAIN( pExpandedKey, c, d ); + + pbData += SYMCRYPT_AES_BLOCK_SIZE; + cbData -= SYMCRYPT_AES_BLOCK_SIZE; + } + AES_ENCRYPT_1_CHAIN( pExpandedKey, c, rkLast ); + _mm_storeu_si128( (__m128i *) pbChainingValue, c ); +} + + +#pragma warning(push) +#pragma warning( disable:4701 ) // "Use of uninitialized variable" +#pragma runtime_checks( "u", off ) + +#define SYMCRYPT_AesCtrMsbXxXmm SymCryptAesCtrMsb64Xmm +#define MM_ADD_EPIXX _mm_add_epi64 +#define MM_SUB_EPIXX _mm_sub_epi64 + +#include "aes-pattern.c" + +#undef MM_SUB_EPIXX +#undef MM_ADD_EPIXX +#undef SYMCRYPT_AesCtrMsbXxXmm + +#define SYMCRYPT_AesCtrMsbXxXmm SymCryptAesCtrMsb32Xmm +#define MM_ADD_EPIXX _mm_add_epi32 +#define MM_SUB_EPIXX _mm_sub_epi32 + +#include "aes-pattern.c" + +#undef MM_SUB_EPIXX +#undef MM_ADD_EPIXX +#undef SYMCRYPT_AesCtrMsbXxXmm + +#pragma runtime_checks( "u", restore ) +#pragma warning(pop) + +/* + if( cbData >= 16 ) + { + if( cbData >= 32 ) + { + if( cbData >= 48 ) + { + if( cbData >= 64 ) + { + if( cbData >= 80 ) + { + if( cbData >= 96 ) + { + if( cbData >= 112 ) + { + } + } + } + } + } + } + } +*/ + +VOID +SYMCRYPT_CALL +SymCryptXtsAesEncryptDataUnitXmm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbTweakBlock, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE*16 ) PBYTE pbScratch, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + __m128i t0; + __m128i c0, c1, c2, c3, c4, c5, c6, c7; + __m128i roundkey, firstRoundKey, lastRoundKey; + __m128i XTS_ALPHA_MASK = _mm_set_epi32( 1, 1, 1, 0x87 ); + SYMCRYPT_GF128_ELEMENT* tweakBuffer = (SYMCRYPT_GF128_ELEMENT*) pbScratch; + + const BYTE (*keyPtr)[4][4]; + const BYTE (*keyLimit)[4][4] = pExpandedKey->lastEncRoundKey; + UINT64 lastTweakLow, lastTweakHigh; + int aesEncryptXtsLoop; + + SIZE_T cbDataMain; // number of bytes to handle in the main loop + SIZE_T cbDataTail; // number of bytes to handle in the tail loop + + SYMCRYPT_ASSERT(cbData >= SYMCRYPT_AES_BLOCK_SIZE); + + // To simplify logic and unusual size processing, we handle all + // data not a multiple of 8 blocks in the tail loop + cbDataTail = cbData & ((8*SYMCRYPT_AES_BLOCK_SIZE)-1); + // Additionally, so that ciphertext stealing logic does not rely on + // reading back from the destination buffer, when we have a non-zero + // tail, we ensure that we handle at least 1 whole block in the tail + // + // Note that our caller has ensured we have at least 1 whole block + // to process, this is checked in debug build + // This means that cbDataTail is in [1,15] at this point iff there are + // at least 8 whole blocks to process; so the below does not cause + // cbDataTail or cbDataMain to exceed cbData + cbDataTail += ((cbDataTail > 0) && (cbDataTail < SYMCRYPT_AES_BLOCK_SIZE)) ? (8*SYMCRYPT_AES_BLOCK_SIZE) : 0; + cbDataMain = cbData - cbDataTail; + + SYMCRYPT_ASSERT(cbDataMain <= cbData); + SYMCRYPT_ASSERT(cbDataTail <= cbData); + SYMCRYPT_ASSERT((cbDataMain & ((8*SYMCRYPT_AES_BLOCK_SIZE)-1)) == 0); + + c0 = _mm_loadu_si128( (__m128i *) pbTweakBlock ); + XTS_MUL_ALPHA( c0, c1 ); + XTS_MUL_ALPHA( c1, c2 ); + XTS_MUL_ALPHA( c2, c3 ); + + XTS_MUL_ALPHA4( c0, c4 ); + XTS_MUL_ALPHA ( c4, c5 ); + XTS_MUL_ALPHA ( c5, c6 ); + XTS_MUL_ALPHA ( c6, c7 ); + + tweakBuffer[0].m128i = c0; + tweakBuffer[1].m128i = c1; + tweakBuffer[2].m128i = c2; + tweakBuffer[3].m128i = c3; + tweakBuffer[4].m128i = c4; + tweakBuffer[5].m128i = c5; + tweakBuffer[6].m128i = c6; + tweakBuffer[7].m128i = c7; + lastTweakLow = tweakBuffer[7].ull[0]; + lastTweakHigh = tweakBuffer[7].ull[1]; + + firstRoundKey = _mm_loadu_si128( (__m128i *) &pExpandedKey->RoundKey[0] ); + lastRoundKey = _mm_loadu_si128( (__m128i *) pExpandedKey->lastEncRoundKey ); + + while( cbDataMain > 0 ) + { + // At loop entry, tweakBuffer[0-7] are tweakValues for the next 8 blocks + c0 = _mm_xor_si128( tweakBuffer[0].m128i, firstRoundKey ); + c1 = _mm_xor_si128( tweakBuffer[1].m128i, firstRoundKey ); + c2 = _mm_xor_si128( tweakBuffer[2].m128i, firstRoundKey ); + c3 = _mm_xor_si128( tweakBuffer[3].m128i, firstRoundKey ); + c4 = _mm_xor_si128( tweakBuffer[4].m128i, firstRoundKey ); + c5 = _mm_xor_si128( tweakBuffer[5].m128i, firstRoundKey ); + c6 = _mm_xor_si128( tweakBuffer[6].m128i, firstRoundKey ); + c7 = _mm_xor_si128( tweakBuffer[7].m128i, firstRoundKey ); + + c0 = _mm_xor_si128( c0, _mm_loadu_si128( ( __m128i * ) (pbSrc + 0) ) ); + c1 = _mm_xor_si128( c1, _mm_loadu_si128( ( __m128i * ) (pbSrc + 16) ) ); + c2 = _mm_xor_si128( c2, _mm_loadu_si128( ( __m128i * ) (pbSrc + 32) ) ); + c3 = _mm_xor_si128( c3, _mm_loadu_si128( ( __m128i * ) (pbSrc + 48) ) ); + c4 = _mm_xor_si128( c4, _mm_loadu_si128( ( __m128i * ) (pbSrc + 64) ) ); + c5 = _mm_xor_si128( c5, _mm_loadu_si128( ( __m128i * ) (pbSrc + 80) ) ); + c6 = _mm_xor_si128( c6, _mm_loadu_si128( ( __m128i * ) (pbSrc + 96) ) ); + c7 = _mm_xor_si128( c7, _mm_loadu_si128( ( __m128i * ) (pbSrc + 112) ) ); + + keyPtr = &pExpandedKey->RoundKey[1]; + + // Do 8 full rounds (AES-128|AES-192|AES-256) with stitched XTS (performed in scalar registers) + for( aesEncryptXtsLoop = 0; aesEncryptXtsLoop < 8; aesEncryptXtsLoop++ ) + { + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); + keyPtr ++; + c0 = _mm_aesenc_si128( c0, roundkey ); + c1 = _mm_aesenc_si128( c1, roundkey ); + c2 = _mm_aesenc_si128( c2, roundkey ); + c3 = _mm_aesenc_si128( c3, roundkey ); + c4 = _mm_aesenc_si128( c4, roundkey ); + c5 = _mm_aesenc_si128( c5, roundkey ); + c6 = _mm_aesenc_si128( c6, roundkey ); + c7 = _mm_aesenc_si128( c7, roundkey ); + + // Prepare tweakBuffer[8-15] with tweak^lastRoundKey + tweakBuffer[ 8+aesEncryptXtsLoop ].m128i = _mm_xor_si128( tweakBuffer[ aesEncryptXtsLoop ].m128i, lastRoundKey ); + // Prepare tweakBuffer[0-7] with tweaks for next 8 blocks + XTS_MUL_ALPHA_Scalar( lastTweakLow, lastTweakHigh ); + tweakBuffer[ aesEncryptXtsLoop ].ull[0] = lastTweakLow; + tweakBuffer[ aesEncryptXtsLoop ].ull[1] = lastTweakHigh; + } + + do + { + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); + keyPtr ++; + c0 = _mm_aesenc_si128( c0, roundkey ); + c1 = _mm_aesenc_si128( c1, roundkey ); + c2 = _mm_aesenc_si128( c2, roundkey ); + c3 = _mm_aesenc_si128( c3, roundkey ); + c4 = _mm_aesenc_si128( c4, roundkey ); + c5 = _mm_aesenc_si128( c5, roundkey ); + c6 = _mm_aesenc_si128( c6, roundkey ); + c7 = _mm_aesenc_si128( c7, roundkey ); + } while( keyPtr < keyLimit ); + + _mm_storeu_si128( (__m128i *) (pbDst + 0), _mm_aesenclast_si128( c0, tweakBuffer[ 8].m128i ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 16), _mm_aesenclast_si128( c1, tweakBuffer[ 9].m128i ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 32), _mm_aesenclast_si128( c2, tweakBuffer[10].m128i ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 48), _mm_aesenclast_si128( c3, tweakBuffer[11].m128i ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 64), _mm_aesenclast_si128( c4, tweakBuffer[12].m128i ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 80), _mm_aesenclast_si128( c5, tweakBuffer[13].m128i ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 96), _mm_aesenclast_si128( c6, tweakBuffer[14].m128i ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 112), _mm_aesenclast_si128( c7, tweakBuffer[15].m128i ) ); + + pbSrc += 8 * SYMCRYPT_AES_BLOCK_SIZE; + pbDst += 8 * SYMCRYPT_AES_BLOCK_SIZE; + cbDataMain -= 8 * SYMCRYPT_AES_BLOCK_SIZE; + } + + if( cbDataTail == 0 ) + { + return; // <-- expected case; early return here + } + + // Rare case, with data unit length not being multiple of 128 bytes, handle the tail one block at a time + t0 = tweakBuffer[0].m128i; + + while( cbDataTail >= 2*SYMCRYPT_AES_BLOCK_SIZE ) + { + c0 = _mm_xor_si128( _mm_loadu_si128( ( __m128i * ) pbSrc ), t0 ); + pbSrc += SYMCRYPT_AES_BLOCK_SIZE; + AES_ENCRYPT_1( pExpandedKey, c0 ); + _mm_storeu_si128( (__m128i *) pbDst, _mm_xor_si128( c0, t0 ) ); + pbDst += SYMCRYPT_AES_BLOCK_SIZE; + XTS_MUL_ALPHA( t0, t0 ); + cbDataTail -= SYMCRYPT_AES_BLOCK_SIZE; + } + + if( cbDataTail > SYMCRYPT_AES_BLOCK_SIZE ) + { + // Ciphertext stealing encryption + // + // +--------------+ + // | | + // | V + // +-----------------+ | +-----+-----------+ + // | P_m-1 | | | P_m |++++CP+++++| + // +-----------------+ | +-----+-----------+ + // | | | + // enc_m-1 | enc_m + // | | | + // V | V + // +-----+-----------+ | +-----------------+ + // | C_m |++++CP+++++|--+ | C_m-1 | + // +-----+-----------+ +-----------------+ + // | / + // +---------------- / --+ + // / | + // | V + // +-----------------+ | +-----+ + // | C_m-1 |<-+ | C_m | + // +-----------------+ +-----+ + + // Encrypt penultimate plaintext block into tweakBuffer[0] + c0 = _mm_xor_si128( _mm_loadu_si128( (__m128i *) pbSrc ), t0 ); + AES_ENCRYPT_1( pExpandedKey, c0 ); + tweakBuffer[0].m128i = _mm_xor_si128( c0, t0 ); + + cbDataTail -= SYMCRYPT_AES_BLOCK_SIZE; + + // Copy tweakBuffer[0] to tweakBuffer[1] + tweakBuffer[1].m128i = tweakBuffer[0].m128i; + // Copy final plaintext bytes to prefix of tweakBuffer[0] - we must read before writing to support in-place encryption + memcpy( &tweakBuffer[0].ul[0], pbSrc + SYMCRYPT_AES_BLOCK_SIZE, cbDataTail ); + // Copy prefix of tweakBuffer[1] to the right place in the destination buffer + memcpy( pbDst + SYMCRYPT_AES_BLOCK_SIZE, &tweakBuffer[1].ul[0], cbDataTail ); + + // Do final tweak update + XTS_MUL_ALPHA( t0, t0 ); + + // Load updated tweakBuffer[0] into c0 + c0 = tweakBuffer[0].m128i; + } else { + // Just load final plaintext block into c0 + c0 = _mm_loadu_si128( (__m128i*) pbSrc ); + } + + // Final full block encryption + c0 = _mm_xor_si128( c0, t0 ); + AES_ENCRYPT_1( pExpandedKey, c0 ); + _mm_storeu_si128( (__m128i *) pbDst, _mm_xor_si128( c0, t0 ) ); +} + +VOID +SYMCRYPT_CALL +SymCryptXtsAesDecryptDataUnitXmm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbTweakBlock, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE*16 ) PBYTE pbScratch, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + __m128i t0; + __m128i c0, c1, c2, c3, c4, c5, c6, c7; + __m128i roundkey, firstRoundKey, lastRoundKey; + __m128i XTS_ALPHA_MASK = _mm_set_epi32( 1, 1, 1, 0x87 ); + SYMCRYPT_GF128_ELEMENT* tweakBuffer = (SYMCRYPT_GF128_ELEMENT*) pbScratch; + + const BYTE (*keyPtr)[4][4]; + const BYTE (*keyLimit)[4][4] = pExpandedKey->lastDecRoundKey; + UINT64 lastTweakLow, lastTweakHigh; + int aesDecryptXtsLoop; + + SIZE_T cbDataMain; // number of bytes to handle in the main loop + SIZE_T cbDataTail; // number of bytes to handle in the tail loop + + SYMCRYPT_ASSERT(cbData >= SYMCRYPT_AES_BLOCK_SIZE); + + // To simplify logic and unusual size processing, we handle all + // data not a multiple of 8 blocks in the tail loop + cbDataTail = cbData & ((8*SYMCRYPT_AES_BLOCK_SIZE)-1); + // Additionally, so that ciphertext stealing logic does not rely on + // reading back from the destination buffer, when we have a non-zero + // tail, we ensure that we handle at least 1 whole block in the tail + // + // Note that our caller has ensured we have at least 1 whole block + // to process, this is checked in debug build + // This means that cbDataTail is in [1,15] at this point iff there are + // at least 8 whole blocks to process; so the below does not cause + // cbDataTail or cbDataMain to exceed cbData + cbDataTail += ((cbDataTail > 0) && (cbDataTail < SYMCRYPT_AES_BLOCK_SIZE)) ? (8*SYMCRYPT_AES_BLOCK_SIZE) : 0; + cbDataMain = cbData - cbDataTail; + + SYMCRYPT_ASSERT(cbDataMain <= cbData); + SYMCRYPT_ASSERT(cbDataTail <= cbData); + SYMCRYPT_ASSERT((cbDataMain & ((8*SYMCRYPT_AES_BLOCK_SIZE)-1)) == 0); + + c0 = _mm_loadu_si128( (__m128i *) pbTweakBlock ); + XTS_MUL_ALPHA( c0, c1 ); + XTS_MUL_ALPHA( c1, c2 ); + XTS_MUL_ALPHA( c2, c3 ); + + XTS_MUL_ALPHA4( c0, c4 ); + XTS_MUL_ALPHA ( c4, c5 ); + XTS_MUL_ALPHA ( c5, c6 ); + XTS_MUL_ALPHA ( c6, c7 ); + + tweakBuffer[0].m128i = c0; + tweakBuffer[1].m128i = c1; + tweakBuffer[2].m128i = c2; + tweakBuffer[3].m128i = c3; + tweakBuffer[4].m128i = c4; + tweakBuffer[5].m128i = c5; + tweakBuffer[6].m128i = c6; + tweakBuffer[7].m128i = c7; + lastTweakLow = tweakBuffer[7].ull[0]; + lastTweakHigh = tweakBuffer[7].ull[1]; + + firstRoundKey = _mm_loadu_si128( (__m128i *) pExpandedKey->lastEncRoundKey ); + lastRoundKey = _mm_loadu_si128( (__m128i *) pExpandedKey->lastDecRoundKey ); + + while( cbDataMain > 0 ) + { + // At loop entry, tweakBuffer[0-7] are tweakValues for the next 8 blocks + c0 = _mm_xor_si128( tweakBuffer[0].m128i, firstRoundKey ); + c1 = _mm_xor_si128( tweakBuffer[1].m128i, firstRoundKey ); + c2 = _mm_xor_si128( tweakBuffer[2].m128i, firstRoundKey ); + c3 = _mm_xor_si128( tweakBuffer[3].m128i, firstRoundKey ); + c4 = _mm_xor_si128( tweakBuffer[4].m128i, firstRoundKey ); + c5 = _mm_xor_si128( tweakBuffer[5].m128i, firstRoundKey ); + c6 = _mm_xor_si128( tweakBuffer[6].m128i, firstRoundKey ); + c7 = _mm_xor_si128( tweakBuffer[7].m128i, firstRoundKey ); + + c0 = _mm_xor_si128( c0, _mm_loadu_si128( ( __m128i * ) (pbSrc + 0) ) ); + c1 = _mm_xor_si128( c1, _mm_loadu_si128( ( __m128i * ) (pbSrc + 16) ) ); + c2 = _mm_xor_si128( c2, _mm_loadu_si128( ( __m128i * ) (pbSrc + 32) ) ); + c3 = _mm_xor_si128( c3, _mm_loadu_si128( ( __m128i * ) (pbSrc + 48) ) ); + c4 = _mm_xor_si128( c4, _mm_loadu_si128( ( __m128i * ) (pbSrc + 64) ) ); + c5 = _mm_xor_si128( c5, _mm_loadu_si128( ( __m128i * ) (pbSrc + 80) ) ); + c6 = _mm_xor_si128( c6, _mm_loadu_si128( ( __m128i * ) (pbSrc + 96) ) ); + c7 = _mm_xor_si128( c7, _mm_loadu_si128( ( __m128i * ) (pbSrc + 112) ) ); + + keyPtr = pExpandedKey->lastEncRoundKey + 1; + + // Do 8 full rounds (AES-128|AES-192|AES-256) with stitched XTS (performed in scalar registers) + for( aesDecryptXtsLoop = 0; aesDecryptXtsLoop < 8; aesDecryptXtsLoop++ ) + { + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); + keyPtr ++; + c0 = _mm_aesdec_si128( c0, roundkey ); + c1 = _mm_aesdec_si128( c1, roundkey ); + c2 = _mm_aesdec_si128( c2, roundkey ); + c3 = _mm_aesdec_si128( c3, roundkey ); + c4 = _mm_aesdec_si128( c4, roundkey ); + c5 = _mm_aesdec_si128( c5, roundkey ); + c6 = _mm_aesdec_si128( c6, roundkey ); + c7 = _mm_aesdec_si128( c7, roundkey ); + + // Prepare tweakBuffer[8-15] with tweak^lastRoundKey + tweakBuffer[ 8+aesDecryptXtsLoop ].m128i = _mm_xor_si128( tweakBuffer[ aesDecryptXtsLoop ].m128i, lastRoundKey ); + // Prepare tweakBuffer[0-7] with tweaks for next 8 blocks + XTS_MUL_ALPHA_Scalar( lastTweakLow, lastTweakHigh ); + tweakBuffer[ aesDecryptXtsLoop ].ull[0] = lastTweakLow; + tweakBuffer[ aesDecryptXtsLoop ].ull[1] = lastTweakHigh; + } + + do + { + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); + keyPtr ++; + c0 = _mm_aesdec_si128( c0, roundkey ); + c1 = _mm_aesdec_si128( c1, roundkey ); + c2 = _mm_aesdec_si128( c2, roundkey ); + c3 = _mm_aesdec_si128( c3, roundkey ); + c4 = _mm_aesdec_si128( c4, roundkey ); + c5 = _mm_aesdec_si128( c5, roundkey ); + c6 = _mm_aesdec_si128( c6, roundkey ); + c7 = _mm_aesdec_si128( c7, roundkey ); + } while( keyPtr < keyLimit ); + + _mm_storeu_si128( (__m128i *) (pbDst + 0), _mm_aesdeclast_si128( c0, tweakBuffer[ 8].m128i ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 16), _mm_aesdeclast_si128( c1, tweakBuffer[ 9].m128i ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 32), _mm_aesdeclast_si128( c2, tweakBuffer[10].m128i ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 48), _mm_aesdeclast_si128( c3, tweakBuffer[11].m128i ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 64), _mm_aesdeclast_si128( c4, tweakBuffer[12].m128i ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 80), _mm_aesdeclast_si128( c5, tweakBuffer[13].m128i ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 96), _mm_aesdeclast_si128( c6, tweakBuffer[14].m128i ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 112), _mm_aesdeclast_si128( c7, tweakBuffer[15].m128i ) ); + + pbSrc += 8 * SYMCRYPT_AES_BLOCK_SIZE; + pbDst += 8 * SYMCRYPT_AES_BLOCK_SIZE; + cbDataMain -= 8 * SYMCRYPT_AES_BLOCK_SIZE; + } + + if( cbDataTail == 0 ) + { + return; // <-- expected case; early return here + } + + // Rare case, with data unit length not being multiple of 128 bytes, handle the tail one block at a time + t0 = tweakBuffer[0].m128i; + + while( cbDataTail >= 2*SYMCRYPT_AES_BLOCK_SIZE ) + { + c0 = _mm_xor_si128( _mm_loadu_si128( ( __m128i * ) pbSrc ), t0 ); + pbSrc += SYMCRYPT_AES_BLOCK_SIZE; + AES_DECRYPT_1( pExpandedKey, c0 ); + _mm_storeu_si128( (__m128i *) pbDst, _mm_xor_si128( c0, t0 ) ); + pbDst += SYMCRYPT_AES_BLOCK_SIZE; + c7 = t0; + XTS_MUL_ALPHA( t0, t0 ); + cbDataTail -= SYMCRYPT_AES_BLOCK_SIZE; + } + + if( cbDataTail > SYMCRYPT_AES_BLOCK_SIZE ) + { + // Ciphertext stealing decryption + // + // +--------------+ + // | | + // | V + // +-----------------+ | +-----+-----------+ + // | C_m-1 | | | C_m |++++CP+++++| + // +-----------------+ | +-----+-----------+ + // | | | + // dec_m | dec_m-1 + // | | | + // V | V + // +-----+-----------+ | +-----------------+ + // | P_m |++++CP+++++|--+ | P_m-1 | + // +-----+-----------+ +-----------------+ + // | / + // +---------------- / --+ + // / | + // | V + // +-----------------+ | +-----+ + // | P_m-1 |<-+ | P_m | + // +-----------------+ +-----+ + + // Do final tweak update into c1 + // Penultimate tweak is in t0, ready for final decryption + XTS_MUL_ALPHA( t0, c1 ); + + // Decrypt penultimate ciphertext block into tweakBuffer[0] + c0 = _mm_xor_si128( _mm_loadu_si128( (__m128i *) pbSrc ), c1 ); + AES_DECRYPT_1( pExpandedKey, c0 ); + tweakBuffer[0].m128i = _mm_xor_si128( c0, c1 ); + + cbDataTail -= SYMCRYPT_AES_BLOCK_SIZE; + + // Copy tweakBuffer[0] to tweakBuffer[1] + tweakBuffer[1].m128i = tweakBuffer[0].m128i; + // Copy final ciphertext bytes to prefix of tweakBuffer[0] - we must read before writing to support in-place decryption + memcpy( &tweakBuffer[0].ul[0], pbSrc + SYMCRYPT_AES_BLOCK_SIZE, cbDataTail ); + // Copy prefix of tweakBuffer[1] to the right place in the destination buffer + memcpy( pbDst + SYMCRYPT_AES_BLOCK_SIZE, &tweakBuffer[1].ul[0], cbDataTail ); + + // Load updated tweakBuffer[0] into c0 + c0 = tweakBuffer[0].m128i; + } else { + // Just load final ciphertext block into c0 + c0 = _mm_loadu_si128( (__m128i*) pbSrc ); + } + + // Final full block decryption + c0 = _mm_xor_si128( c0, t0 ); + AES_DECRYPT_1( pExpandedKey, c0 ); + _mm_storeu_si128( (__m128i *) pbDst, _mm_xor_si128( c0, t0 ) ); +} + +#define AES_FULLROUND_4_GHASH_1( roundkey, keyPtr, c0, c1, c2, c3, r0, t0, t1, gHashPointer, byteReverseOrder, gHashExpandedKeyTable, todo, resl, resm, resh ) \ +{ \ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ + keyPtr ++; \ + c0 = _mm_aesenc_si128( c0, roundkey ); \ + c1 = _mm_aesenc_si128( c1, roundkey ); \ + c2 = _mm_aesenc_si128( c2, roundkey ); \ + c3 = _mm_aesenc_si128( c3, roundkey ); \ +\ + r0 = _mm_loadu_si128( (__m128i *) gHashPointer ); \ + r0 = _mm_shuffle_epi8( r0, byteReverseOrder ); \ + gHashPointer += 16; \ +\ + t1 = _mm_loadu_si128( (__m128i *) &GHASH_H_POWER(gHashExpandedKeyTable, todo) ); \ + t0 = _mm_clmulepi64_si128( r0, t1, 0x00 ); \ + t1 = _mm_clmulepi64_si128( r0, t1, 0x11 ); \ +\ + resl = _mm_xor_si128( resl, t0 ); \ + resh = _mm_xor_si128( resh, t1 ); \ +\ + t0 = _mm_srli_si128( r0, 8 ); \ + r0 = _mm_xor_si128( r0, t0 ); \ + t1 = _mm_loadu_si128( (__m128i *) &GHASH_Hx_POWER(gHashExpandedKeyTable, todo) ); \ + t1 = _mm_clmulepi64_si128( r0, t1, 0x00 ); \ +\ + resm = _mm_xor_si128( resm, t1 ); \ + todo --; \ +}; + +#define AES_GCM_ENCRYPT_4( pExpandedKey, c0, c1, c2, c3, gHashPointer, ghashRounds, byteReverseOrder, gHashExpandedKeyTable, todo, resl, resm, resh ) \ +{ \ + const BYTE (*keyPtr)[4][4]; \ + const BYTE (*keyLimit)[4][4]; \ + __m128i roundkey; \ + __m128i t0, t1; \ + __m128i r0; \ + SIZE_T aesEncryptGhashLoop; \ +\ + keyPtr = &pExpandedKey->RoundKey[0]; \ + keyLimit = pExpandedKey->lastEncRoundKey; \ +\ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ + keyPtr ++; \ + c0 = _mm_xor_si128( c0, roundkey ); \ + c1 = _mm_xor_si128( c1, roundkey ); \ + c2 = _mm_xor_si128( c2, roundkey ); \ + c3 = _mm_xor_si128( c3, roundkey ); \ +\ + /* Do ghashRounds full rounds (AES-128|AES-192|AES-256) with stitched GHASH */ \ + for( aesEncryptGhashLoop = 0; aesEncryptGhashLoop < ghashRounds; aesEncryptGhashLoop++ ) \ + { \ + AES_FULLROUND_4_GHASH_1( roundkey, keyPtr, c0, c1, c2, c3, r0, t0, t1, gHashPointer, byteReverseOrder, gHashExpandedKeyTable, todo, resl, resm, resh ); \ + } \ +\ + do \ + { \ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ + keyPtr ++; \ + c0 = _mm_aesenc_si128( c0, roundkey ); \ + c1 = _mm_aesenc_si128( c1, roundkey ); \ + c2 = _mm_aesenc_si128( c2, roundkey ); \ + c3 = _mm_aesenc_si128( c3, roundkey ); \ + } while( keyPtr < keyLimit ); \ +\ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ +\ + c0 = _mm_aesenclast_si128( c0, roundkey ); \ + c1 = _mm_aesenclast_si128( c1, roundkey ); \ + c2 = _mm_aesenclast_si128( c2, roundkey ); \ + c3 = _mm_aesenclast_si128( c3, roundkey ); \ +}; + +#define AES_FULLROUND_8_GHASH_1( roundkey, keyPtr, c0, c1, c2, c3, c4, c5, c6, c7, r0, t0, t1, gHashPointer, byteReverseOrder, gHashExpandedKeyTable, todo, resl, resm, resh ) \ +{ \ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ + keyPtr ++; \ + c0 = _mm_aesenc_si128( c0, roundkey ); \ + c1 = _mm_aesenc_si128( c1, roundkey ); \ + c2 = _mm_aesenc_si128( c2, roundkey ); \ + c3 = _mm_aesenc_si128( c3, roundkey ); \ + c4 = _mm_aesenc_si128( c4, roundkey ); \ + c5 = _mm_aesenc_si128( c5, roundkey ); \ + c6 = _mm_aesenc_si128( c6, roundkey ); \ + c7 = _mm_aesenc_si128( c7, roundkey ); \ +\ + r0 = _mm_loadu_si128( (__m128i *) gHashPointer ); \ + r0 = _mm_shuffle_epi8( r0, byteReverseOrder ); \ + gHashPointer += 16; \ +\ + t1 = _mm_loadu_si128( (__m128i *) &GHASH_H_POWER(gHashExpandedKeyTable, todo) ); \ + t0 = _mm_clmulepi64_si128( r0, t1, 0x00 ); \ + t1 = _mm_clmulepi64_si128( r0, t1, 0x11 ); \ +\ + resl = _mm_xor_si128( resl, t0 ); \ + resh = _mm_xor_si128( resh, t1 ); \ +\ + t0 = _mm_srli_si128( r0, 8 ); \ + r0 = _mm_xor_si128( r0, t0 ); \ + t1 = _mm_loadu_si128( (__m128i *) &GHASH_Hx_POWER(gHashExpandedKeyTable, todo) ); \ + t1 = _mm_clmulepi64_si128( r0, t1, 0x00 ); \ +\ + resm = _mm_xor_si128( resm, t1 ); \ + todo --; \ +}; + +#define AES_GCM_ENCRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7, gHashPointer, ghashRounds, byteReverseOrder, gHashExpandedKeyTable, todo, resl, resm, resh ) \ +{ \ + const BYTE (*keyPtr)[4][4]; \ + const BYTE (*keyLimit)[4][4]; \ + __m128i roundkey; \ + __m128i t0, t1; \ + __m128i r0; \ + SIZE_T aesEncryptGhashLoop; \ +\ + keyPtr = &pExpandedKey->RoundKey[0]; \ + keyLimit = pExpandedKey->lastEncRoundKey; \ +\ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ + keyPtr ++; \ + c0 = _mm_xor_si128( c0, roundkey ); \ + c1 = _mm_xor_si128( c1, roundkey ); \ + c2 = _mm_xor_si128( c2, roundkey ); \ + c3 = _mm_xor_si128( c3, roundkey ); \ + c4 = _mm_xor_si128( c4, roundkey ); \ + c5 = _mm_xor_si128( c5, roundkey ); \ + c6 = _mm_xor_si128( c6, roundkey ); \ + c7 = _mm_xor_si128( c7, roundkey ); \ +\ + /* Do ghashRounds full rounds (AES-128|AES-192|AES-256) with stitched GHASH */ \ + for( aesEncryptGhashLoop = 0; aesEncryptGhashLoop < ghashRounds; aesEncryptGhashLoop++ ) \ + { \ + AES_FULLROUND_8_GHASH_1( roundkey, keyPtr, c0, c1, c2, c3, c4, c5, c6, c7, r0, t0, t1, gHashPointer, byteReverseOrder, gHashExpandedKeyTable, todo, resl, resm, resh ); \ + } \ +\ + do \ + { \ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ + keyPtr ++; \ + c0 = _mm_aesenc_si128( c0, roundkey ); \ + c1 = _mm_aesenc_si128( c1, roundkey ); \ + c2 = _mm_aesenc_si128( c2, roundkey ); \ + c3 = _mm_aesenc_si128( c3, roundkey ); \ + c4 = _mm_aesenc_si128( c4, roundkey ); \ + c5 = _mm_aesenc_si128( c5, roundkey ); \ + c6 = _mm_aesenc_si128( c6, roundkey ); \ + c7 = _mm_aesenc_si128( c7, roundkey ); \ + } while( keyPtr < keyLimit ); \ +\ + roundkey = _mm_loadu_si128( (__m128i *) keyPtr ); \ +\ + c0 = _mm_aesenclast_si128( c0, roundkey ); \ + c1 = _mm_aesenclast_si128( c1, roundkey ); \ + c2 = _mm_aesenclast_si128( c2, roundkey ); \ + c3 = _mm_aesenclast_si128( c3, roundkey ); \ + c4 = _mm_aesenclast_si128( c4, roundkey ); \ + c5 = _mm_aesenclast_si128( c5, roundkey ); \ + c6 = _mm_aesenclast_si128( c6, roundkey ); \ + c7 = _mm_aesenclast_si128( c7, roundkey ); \ +}; + +// This call is functionally identical to: +// SymCryptAesCtrMsb64Xmm( pExpandedKey, +// pbChainingValue, +// pbSrc, +// pbDst, +// cbData ); +// SymCryptGHashAppendDataPclmulqdq( expandedKeyTable, +// pState, +// pbDst, +// cbData ); +VOID +SYMCRYPT_CALL +SymCryptAesGcmEncryptStitchedXmm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( SYMCRYPT_GF128_FIELD_SIZE ) PCSYMCRYPT_GF128_ELEMENT expandedKeyTable, + _Inout_ PSYMCRYPT_GF128_ELEMENT pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + __m128i chain = _mm_loadu_si128( (__m128i *) pbChainingValue ); + + __m128i BYTE_REVERSE_ORDER = _mm_set_epi8( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ); + __m128i vMultiplicationConstant = _mm_set_epi32( 0, 0, 0xc2000000, 0 ); + + __m128i chainIncrement1 = _mm_set_epi32( 0, 0, 0, 1 ); + __m128i chainIncrement2 = _mm_set_epi32( 0, 0, 0, 2 ); + __m128i chainIncrement8 = _mm_set_epi32( 0, 0, 0, 8 ); + + __m128i c0, c1, c2, c3, c4, c5, c6, c7; + __m128i r0, r1; + + __m128i state; + __m128i a0, a1, a2; + SIZE_T nBlocks = cbData / SYMCRYPT_GF128_BLOCK_SIZE; + SIZE_T todo; + PCBYTE pbGhashSrc = pbDst; + + SYMCRYPT_ASSERT( (cbData & SYMCRYPT_GCM_BLOCK_MOD_MASK) == 0 ); // cbData is multiple of block size + + chain = _mm_shuffle_epi8( chain, BYTE_REVERSE_ORDER ); + state = _mm_loadu_si128( (__m128i *) pState ); + + todo = SYMCRYPT_MIN( nBlocks, SYMCRYPT_GHASH_PCLMULQDQ_HPOWERS ); + CLMUL_3( state, GHASH_H_POWER(expandedKeyTable, todo), GHASH_Hx_POWER(expandedKeyTable, todo), a0, a1, a2 ); + + // Do 8 blocks of CTR either for tail (if total blocks <8) or for encryption of first 8 blocks + c0 = chain; + c1 = _mm_add_epi32( chain, chainIncrement1 ); + c2 = _mm_add_epi32( chain, chainIncrement2 ); + c3 = _mm_add_epi32( c1, chainIncrement2 ); + c4 = _mm_add_epi32( c2, chainIncrement2 ); + c5 = _mm_add_epi32( c3, chainIncrement2 ); + c6 = _mm_add_epi32( c4, chainIncrement2 ); + c7 = _mm_add_epi32( c5, chainIncrement2 ); + + c0 = _mm_shuffle_epi8( c0, BYTE_REVERSE_ORDER ); + c1 = _mm_shuffle_epi8( c1, BYTE_REVERSE_ORDER ); + c2 = _mm_shuffle_epi8( c2, BYTE_REVERSE_ORDER ); + c3 = _mm_shuffle_epi8( c3, BYTE_REVERSE_ORDER ); + c4 = _mm_shuffle_epi8( c4, BYTE_REVERSE_ORDER ); + c5 = _mm_shuffle_epi8( c5, BYTE_REVERSE_ORDER ); + c6 = _mm_shuffle_epi8( c6, BYTE_REVERSE_ORDER ); + c7 = _mm_shuffle_epi8( c7, BYTE_REVERSE_ORDER ); + + AES_ENCRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 ); + + if( nBlocks >= 8 ) + { + // Encrypt first 8 blocks - update chain + chain = _mm_add_epi32( chain, chainIncrement8 ); + + _mm_storeu_si128( (__m128i *) (pbDst + 0), _mm_xor_si128( c0, _mm_loadu_si128( ( __m128i * ) (pbSrc + 0) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 16), _mm_xor_si128( c1, _mm_loadu_si128( ( __m128i * ) (pbSrc + 16) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 32), _mm_xor_si128( c2, _mm_loadu_si128( ( __m128i * ) (pbSrc + 32) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 48), _mm_xor_si128( c3, _mm_loadu_si128( ( __m128i * ) (pbSrc + 48) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 64), _mm_xor_si128( c4, _mm_loadu_si128( ( __m128i * ) (pbSrc + 64) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 80), _mm_xor_si128( c5, _mm_loadu_si128( ( __m128i * ) (pbSrc + 80) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 96), _mm_xor_si128( c6, _mm_loadu_si128( ( __m128i * ) (pbSrc + 96) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst +112), _mm_xor_si128( c7, _mm_loadu_si128( ( __m128i * ) (pbSrc +112) ) ) ); + + pbDst += 8 * SYMCRYPT_AES_BLOCK_SIZE; + pbSrc += 8 * SYMCRYPT_AES_BLOCK_SIZE; + + while( nBlocks >= 16 ) + { + // In this loop we always have 8 blocks to encrypt and we have already encrypted the previous 8 blocks ready for GHASH + c0 = chain; + c1 = _mm_add_epi32( chain, chainIncrement1 ); + c2 = _mm_add_epi32( chain, chainIncrement2 ); + c3 = _mm_add_epi32( c1, chainIncrement2 ); + c4 = _mm_add_epi32( c2, chainIncrement2 ); + c5 = _mm_add_epi32( c3, chainIncrement2 ); + c6 = _mm_add_epi32( c4, chainIncrement2 ); + c7 = _mm_add_epi32( c5, chainIncrement2 ); + chain = _mm_add_epi32( c6, chainIncrement2 ); + + c0 = _mm_shuffle_epi8( c0, BYTE_REVERSE_ORDER ); + c1 = _mm_shuffle_epi8( c1, BYTE_REVERSE_ORDER ); + c2 = _mm_shuffle_epi8( c2, BYTE_REVERSE_ORDER ); + c3 = _mm_shuffle_epi8( c3, BYTE_REVERSE_ORDER ); + c4 = _mm_shuffle_epi8( c4, BYTE_REVERSE_ORDER ); + c5 = _mm_shuffle_epi8( c5, BYTE_REVERSE_ORDER ); + c6 = _mm_shuffle_epi8( c6, BYTE_REVERSE_ORDER ); + c7 = _mm_shuffle_epi8( c7, BYTE_REVERSE_ORDER ); + + AES_GCM_ENCRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7, pbGhashSrc, 8, BYTE_REVERSE_ORDER, expandedKeyTable, todo, a0, a1, a2 ); + + _mm_storeu_si128( (__m128i *) (pbDst + 0), _mm_xor_si128( c0, _mm_loadu_si128( ( __m128i * ) (pbSrc + 0) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 16), _mm_xor_si128( c1, _mm_loadu_si128( ( __m128i * ) (pbSrc + 16) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 32), _mm_xor_si128( c2, _mm_loadu_si128( ( __m128i * ) (pbSrc + 32) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 48), _mm_xor_si128( c3, _mm_loadu_si128( ( __m128i * ) (pbSrc + 48) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 64), _mm_xor_si128( c4, _mm_loadu_si128( ( __m128i * ) (pbSrc + 64) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 80), _mm_xor_si128( c5, _mm_loadu_si128( ( __m128i * ) (pbSrc + 80) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 96), _mm_xor_si128( c6, _mm_loadu_si128( ( __m128i * ) (pbSrc + 96) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst +112), _mm_xor_si128( c7, _mm_loadu_si128( ( __m128i * ) (pbSrc +112) ) ) ); + + pbDst += 8 * SYMCRYPT_AES_BLOCK_SIZE; + pbSrc += 8 * SYMCRYPT_AES_BLOCK_SIZE; + nBlocks -= 8; + + if( todo == 0 ) + { + CLMUL_3_POST( a0, a1, a2 ); + MODREDUCE( vMultiplicationConstant, a0, a1, a2, state ); + + todo = SYMCRYPT_MIN( nBlocks, SYMCRYPT_GHASH_PCLMULQDQ_HPOWERS ); + CLMUL_3( state, GHASH_H_POWER(expandedKeyTable, todo), GHASH_Hx_POWER(expandedKeyTable, todo), a0, a1, a2 ); + } + } + + // We now have at least 8 blocks of encrypted data to GHASH and at most 7 blocks left to encrypt + // Do 8 blocks of GHASH in parallel with generating 0, 4, or 8 AES-CTR blocks for tail encryption + nBlocks -= 8; + if (nBlocks > 0) + { + c0 = chain; + c1 = _mm_add_epi32( chain, chainIncrement1 ); + c2 = _mm_add_epi32( chain, chainIncrement2 ); + c3 = _mm_add_epi32( c1, chainIncrement2 ); + c4 = _mm_add_epi32( c2, chainIncrement2 ); + + c0 = _mm_shuffle_epi8( c0, BYTE_REVERSE_ORDER ); + c1 = _mm_shuffle_epi8( c1, BYTE_REVERSE_ORDER ); + c2 = _mm_shuffle_epi8( c2, BYTE_REVERSE_ORDER ); + c3 = _mm_shuffle_epi8( c3, BYTE_REVERSE_ORDER ); + + if (nBlocks > 4) + { + // Do 8 rounds of AES-CTR for tail in parallel with 8 rounds of GHASH + c5 = _mm_add_epi32( c4, chainIncrement1 ); + c6 = _mm_add_epi32( c4, chainIncrement2 ); + + c4 = _mm_shuffle_epi8( c4, BYTE_REVERSE_ORDER ); + c5 = _mm_shuffle_epi8( c5, BYTE_REVERSE_ORDER ); + c6 = _mm_shuffle_epi8( c6, BYTE_REVERSE_ORDER ); + + AES_GCM_ENCRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7, pbGhashSrc, 8, BYTE_REVERSE_ORDER, expandedKeyTable, todo, a0, a1, a2 ); + } + else + { + // Do 4 rounds of AES-CTR for tail in parallel with 8 rounds of GHASH + AES_GCM_ENCRYPT_4( pExpandedKey, c0, c1, c2, c3, pbGhashSrc, 8, BYTE_REVERSE_ORDER, expandedKeyTable, todo, a0, a1, a2 ); + } + + if( todo == 0) + { + CLMUL_3_POST( a0, a1, a2 ); + MODREDUCE( vMultiplicationConstant, a0, a1, a2, state ); + + todo = SYMCRYPT_MIN( nBlocks, SYMCRYPT_GHASH_PCLMULQDQ_HPOWERS ); + CLMUL_3( state, GHASH_H_POWER(expandedKeyTable, todo), GHASH_Hx_POWER(expandedKeyTable, todo), a0, a1, a2 ); + } + } + else + { + // Just do the final 8 rounds of GHASH + for( todo=8; todo>0; todo-- ) + { + r0 = _mm_shuffle_epi8( _mm_loadu_si128( (__m128i *) (pbGhashSrc + 0) ), BYTE_REVERSE_ORDER ); + pbGhashSrc += SYMCRYPT_AES_BLOCK_SIZE; + + CLMUL_ACC_3( r0, GHASH_H_POWER(expandedKeyTable, todo), GHASH_Hx_POWER(expandedKeyTable, todo), a0, a1, a2 ); + } + + CLMUL_3_POST( a0, a1, a2 ); + MODREDUCE( vMultiplicationConstant, a0, a1, a2, state ); + } + } + + if( nBlocks > 0 ) + { + // Encrypt 1-7 blocks with pre-generated AES-CTR blocks and GHASH the results + while( nBlocks >= 2 ) + { + chain = _mm_add_epi32( chain, chainIncrement2 ); + + r0 = _mm_xor_si128( c0, _mm_loadu_si128( ( __m128i * ) (pbSrc + 0) ) ); + r1 = _mm_xor_si128( c1, _mm_loadu_si128( ( __m128i * ) (pbSrc + 16) ) ); + + _mm_storeu_si128( (__m128i *) (pbDst + 0), r0 ); + _mm_storeu_si128( (__m128i *) (pbDst + 16), r1 ); + + r0 = _mm_shuffle_epi8( r0, BYTE_REVERSE_ORDER ); + r1 = _mm_shuffle_epi8( r1, BYTE_REVERSE_ORDER ); + + CLMUL_ACC_3( r0, GHASH_H_POWER(expandedKeyTable, todo - 0), GHASH_Hx_POWER(expandedKeyTable, todo - 0), a0, a1, a2 ); + CLMUL_ACC_3( r1, GHASH_H_POWER(expandedKeyTable, todo - 1), GHASH_Hx_POWER(expandedKeyTable, todo - 1), a0, a1, a2 ); + + pbDst += 2*SYMCRYPT_AES_BLOCK_SIZE; + pbSrc += 2*SYMCRYPT_AES_BLOCK_SIZE; + todo -= 2; + nBlocks -= 2; + c0 = c2; + c1 = c3; + c2 = c4; + c3 = c5; + c4 = c6; + } + + if( nBlocks > 0 ) + { + chain = _mm_add_epi32( chain, chainIncrement1 ); + + r0 = _mm_xor_si128( c0, _mm_loadu_si128( ( __m128i * ) (pbSrc + 0) ) ); + + _mm_storeu_si128( (__m128i *) (pbDst + 0), r0 ); + + r0 = _mm_shuffle_epi8( r0, BYTE_REVERSE_ORDER ); + + CLMUL_ACC_3( r0, GHASH_H_POWER(expandedKeyTable, 1), GHASH_Hx_POWER(expandedKeyTable, 1), a0, a1, a2 ); + } + + CLMUL_3_POST( a0, a1, a2 ); + MODREDUCE( vMultiplicationConstant, a0, a1, a2, state ); + } + + chain = _mm_shuffle_epi8( chain, BYTE_REVERSE_ORDER ); + _mm_storeu_si128( (__m128i *) pbChainingValue, chain ); + _mm_storeu_si128( (__m128i *) pState, state ); +} + +#pragma warning(push) +#pragma warning( disable:4701 ) +#pragma runtime_checks( "u", off ) +// This call is functionally identical to: +// SymCryptGHashAppendDataPclmulqdq( expandedKeyTable, +// pState, +// pbSrc, +// cbData ); +// SymCryptAesCtrMsb64Xmm( pExpandedKey, +// pbChainingValue, +// pbSrc, +// pbDst, +// cbData ); +VOID +SYMCRYPT_CALL +SymCryptAesGcmDecryptStitchedXmm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( SYMCRYPT_GF128_FIELD_SIZE ) PCSYMCRYPT_GF128_ELEMENT expandedKeyTable, + _Inout_ PSYMCRYPT_GF128_ELEMENT pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + __m128i chain = _mm_loadu_si128( (__m128i *) pbChainingValue ); + + __m128i BYTE_REVERSE_ORDER = _mm_set_epi8( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ); + __m128i vMultiplicationConstant = _mm_set_epi32( 0, 0, 0xc2000000, 0 ); + + __m128i chainIncrement1 = _mm_set_epi32( 0, 0, 0, 1 ); + __m128i chainIncrement2 = _mm_set_epi32( 0, 0, 0, 2 ); + + __m128i c0, c1, c2, c3, c4, c5, c6, c7; + + __m128i state; + __m128i a0, a1, a2; + SIZE_T nBlocks = cbData / SYMCRYPT_GF128_BLOCK_SIZE; + SIZE_T todo = 0; + PCBYTE pbGhashSrc = pbSrc; + + SYMCRYPT_ASSERT( (cbData & SYMCRYPT_GCM_BLOCK_MOD_MASK) == 0 ); // cbData is multiple of block size + + chain = _mm_shuffle_epi8( chain, BYTE_REVERSE_ORDER ); + state = _mm_loadu_si128( (__m128i *) pState ); + + todo = SYMCRYPT_MIN( nBlocks, SYMCRYPT_GHASH_PCLMULQDQ_HPOWERS ); + CLMUL_3( state, GHASH_H_POWER(expandedKeyTable, todo), GHASH_Hx_POWER(expandedKeyTable, todo), a0, a1, a2 ); + + while( nBlocks >= 8 ) + { + // In this loop we always have 8 blocks to decrypt and GHASH + c0 = chain; + c1 = _mm_add_epi32( chain, chainIncrement1 ); + c2 = _mm_add_epi32( chain, chainIncrement2 ); + c3 = _mm_add_epi32( c1, chainIncrement2 ); + c4 = _mm_add_epi32( c2, chainIncrement2 ); + c5 = _mm_add_epi32( c3, chainIncrement2 ); + c6 = _mm_add_epi32( c4, chainIncrement2 ); + c7 = _mm_add_epi32( c5, chainIncrement2 ); + chain = _mm_add_epi32( c6, chainIncrement2 ); + + c0 = _mm_shuffle_epi8( c0, BYTE_REVERSE_ORDER ); + c1 = _mm_shuffle_epi8( c1, BYTE_REVERSE_ORDER ); + c2 = _mm_shuffle_epi8( c2, BYTE_REVERSE_ORDER ); + c3 = _mm_shuffle_epi8( c3, BYTE_REVERSE_ORDER ); + c4 = _mm_shuffle_epi8( c4, BYTE_REVERSE_ORDER ); + c5 = _mm_shuffle_epi8( c5, BYTE_REVERSE_ORDER ); + c6 = _mm_shuffle_epi8( c6, BYTE_REVERSE_ORDER ); + c7 = _mm_shuffle_epi8( c7, BYTE_REVERSE_ORDER ); + + AES_GCM_ENCRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7, pbGhashSrc, 8, BYTE_REVERSE_ORDER, expandedKeyTable, todo, a0, a1, a2 ); + + _mm_storeu_si128( (__m128i *) (pbDst + 0), _mm_xor_si128( c0, _mm_loadu_si128( ( __m128i * ) (pbSrc + 0) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 16), _mm_xor_si128( c1, _mm_loadu_si128( ( __m128i * ) (pbSrc + 16) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 32), _mm_xor_si128( c2, _mm_loadu_si128( ( __m128i * ) (pbSrc + 32) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 48), _mm_xor_si128( c3, _mm_loadu_si128( ( __m128i * ) (pbSrc + 48) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 64), _mm_xor_si128( c4, _mm_loadu_si128( ( __m128i * ) (pbSrc + 64) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 80), _mm_xor_si128( c5, _mm_loadu_si128( ( __m128i * ) (pbSrc + 80) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 96), _mm_xor_si128( c6, _mm_loadu_si128( ( __m128i * ) (pbSrc + 96) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst +112), _mm_xor_si128( c7, _mm_loadu_si128( ( __m128i * ) (pbSrc +112) ) ) ); + + pbDst += 8 * SYMCRYPT_AES_BLOCK_SIZE; + pbSrc += 8 * SYMCRYPT_AES_BLOCK_SIZE; + nBlocks -= 8; + + if ( todo == 0 ) + { + CLMUL_3_POST( a0, a1, a2 ); + MODREDUCE( vMultiplicationConstant, a0, a1, a2, state ); + + if ( nBlocks > 0 ) + { + todo = SYMCRYPT_MIN( nBlocks, SYMCRYPT_GHASH_PCLMULQDQ_HPOWERS ); + CLMUL_3( state, GHASH_H_POWER(expandedKeyTable, todo), GHASH_Hx_POWER(expandedKeyTable, todo), a0, a1, a2 ); + } + } + } + + if( nBlocks > 0 ) + { + // We have 1-7 blocks to GHASH and decrypt + // Do the exact number of GHASH blocks we need in parallel with generating either 4 or 8 blocks of AES-CTR + c0 = chain; + c1 = _mm_add_epi32( chain, chainIncrement1 ); + c2 = _mm_add_epi32( chain, chainIncrement2 ); + c3 = _mm_add_epi32( c1, chainIncrement2 ); + c4 = _mm_add_epi32( c2, chainIncrement2 ); + + c0 = _mm_shuffle_epi8( c0, BYTE_REVERSE_ORDER ); + c1 = _mm_shuffle_epi8( c1, BYTE_REVERSE_ORDER ); + c2 = _mm_shuffle_epi8( c2, BYTE_REVERSE_ORDER ); + c3 = _mm_shuffle_epi8( c3, BYTE_REVERSE_ORDER ); + + if( nBlocks > 4 ) + { + c5 = _mm_add_epi32( c4, chainIncrement1 ); + c6 = _mm_add_epi32( c4, chainIncrement2 ); + + c4 = _mm_shuffle_epi8( c4, BYTE_REVERSE_ORDER ); + c5 = _mm_shuffle_epi8( c5, BYTE_REVERSE_ORDER ); + c6 = _mm_shuffle_epi8( c6, BYTE_REVERSE_ORDER ); + + AES_GCM_ENCRYPT_8( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7, pbGhashSrc, nBlocks, BYTE_REVERSE_ORDER, expandedKeyTable, todo, a0, a1, a2 ); + } else { + AES_GCM_ENCRYPT_4( pExpandedKey, c0, c1, c2, c3, pbGhashSrc, nBlocks, BYTE_REVERSE_ORDER, expandedKeyTable, todo, a0, a1, a2 ); + } + + CLMUL_3_POST( a0, a1, a2 ); + MODREDUCE( vMultiplicationConstant, a0, a1, a2, state ); + + // Decrypt 1-7 blocks with pre-generated AES-CTR blocks + while( nBlocks >= 2 ) + { + chain = _mm_add_epi32( chain, chainIncrement2 ); + + _mm_storeu_si128( (__m128i *) (pbDst + 0), _mm_xor_si128( c0, _mm_loadu_si128( ( __m128i * ) (pbSrc + 0) ) ) ); + _mm_storeu_si128( (__m128i *) (pbDst + 16), _mm_xor_si128( c1, _mm_loadu_si128( ( __m128i * ) (pbSrc + 16) ) ) ); + + pbDst += 2*SYMCRYPT_AES_BLOCK_SIZE; + pbSrc += 2*SYMCRYPT_AES_BLOCK_SIZE; + nBlocks -= 2; + c0 = c2; + c1 = c3; + c2 = c4; + c3 = c5; + c4 = c6; + } + + if( nBlocks > 0 ) + { + chain = _mm_add_epi32( chain, chainIncrement1 ); + + _mm_storeu_si128( (__m128i *) (pbDst + 0), _mm_xor_si128( c0, _mm_loadu_si128( ( __m128i * ) (pbSrc + 0) ) ) ); + } + } + + chain = _mm_shuffle_epi8( chain, BYTE_REVERSE_ORDER ); + _mm_storeu_si128( (__m128i *) pbChainingValue, chain ); + _mm_storeu_si128((__m128i *)pState, state ); +} +#pragma runtime_checks( "u", restore ) +#pragma warning(pop) + +#ifdef __clang__ +#pragma clang attribute pop +#else +#pragma GCC pop_options +#endif + +#endif // CPU_X86 | CPU_AMD64 diff --git a/libs/symcrypt/lib/aes-ymm.c b/libs/symcrypt/lib/aes-ymm.c new file mode 100644 index 00000000000..aa2f473e424 --- /dev/null +++ b/libs/symcrypt/lib/aes-ymm.c @@ -0,0 +1,793 @@ +// +// aes-ymm.c code for AES implementation +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// All YMM code for AES operations +// Requires compiler support for aesni, pclmulqdq, avx2, vaes and vpclmulqdq +// + +#include "precomp.h" + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + +#ifdef __clang__ +#pragma clang attribute push (__attribute__((target("avx2,vaes,vpclmulqdq"))), apply_to=function) +#else +#pragma GCC push_options +#pragma GCC target("avx2,vaes,vpclmulqdq") +#endif + +#include "xtsaes_definitions.h" +#include "ghash_definitions.h" + +#define AES_ENCRYPT_YMM_2048( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 ) \ +{ \ + const BYTE (*keyPtr)[4][4]; \ + const BYTE (*keyLimit)[4][4]; \ + __m256i roundkeys; \ +\ + keyPtr = pExpandedKey->RoundKey; \ + keyLimit = pExpandedKey->lastEncRoundKey; \ +\ + /* _mm256_broadcastsi128_si256 requires AVX2 */ \ + roundkeys = _mm256_broadcastsi128_si256( *( (const __m128i *) keyPtr ) ); \ + keyPtr ++; \ +\ + /* _mm256_xor_si256 requires AVX2 */ \ + c0 = _mm256_xor_si256( c0, roundkeys ); \ + c1 = _mm256_xor_si256( c1, roundkeys ); \ + c2 = _mm256_xor_si256( c2, roundkeys ); \ + c3 = _mm256_xor_si256( c3, roundkeys ); \ + c4 = _mm256_xor_si256( c4, roundkeys ); \ + c5 = _mm256_xor_si256( c5, roundkeys ); \ + c6 = _mm256_xor_si256( c6, roundkeys ); \ + c7 = _mm256_xor_si256( c7, roundkeys ); \ +\ + do \ + { \ + roundkeys = _mm256_broadcastsi128_si256( *( (const __m128i *) keyPtr ) ); \ + keyPtr ++; \ + c0 = _mm256_aesenc_epi128( c0, roundkeys ); \ + c1 = _mm256_aesenc_epi128( c1, roundkeys ); \ + c2 = _mm256_aesenc_epi128( c2, roundkeys ); \ + c3 = _mm256_aesenc_epi128( c3, roundkeys ); \ + c4 = _mm256_aesenc_epi128( c4, roundkeys ); \ + c5 = _mm256_aesenc_epi128( c5, roundkeys ); \ + c6 = _mm256_aesenc_epi128( c6, roundkeys ); \ + c7 = _mm256_aesenc_epi128( c7, roundkeys ); \ + } while( keyPtr < keyLimit ); \ +\ + roundkeys = _mm256_broadcastsi128_si256( *( (const __m128i *) keyPtr ) ); \ +\ + c0 = _mm256_aesenclast_epi128( c0, roundkeys ); \ + c1 = _mm256_aesenclast_epi128( c1, roundkeys ); \ + c2 = _mm256_aesenclast_epi128( c2, roundkeys ); \ + c3 = _mm256_aesenclast_epi128( c3, roundkeys ); \ + c4 = _mm256_aesenclast_epi128( c4, roundkeys ); \ + c5 = _mm256_aesenclast_epi128( c5, roundkeys ); \ + c6 = _mm256_aesenclast_epi128( c6, roundkeys ); \ + c7 = _mm256_aesenclast_epi128( c7, roundkeys ); \ +}; + +#define AES_DECRYPT_YMM_2048( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 ) \ +{ \ + const BYTE (*keyPtr)[4][4]; \ + const BYTE (*keyLimit)[4][4]; \ + __m256i roundkeys; \ +\ + keyPtr = pExpandedKey->lastEncRoundKey; \ + keyLimit = pExpandedKey->lastDecRoundKey; \ +\ + /* _mm256_broadcastsi128_si256 requires AVX2 */ \ + roundkeys = _mm256_broadcastsi128_si256( *( (const __m128i *) keyPtr ) ); \ + keyPtr ++; \ +\ + /* _mm256_xor_si256 requires AVX2 */ \ + c0 = _mm256_xor_si256( c0, roundkeys ); \ + c1 = _mm256_xor_si256( c1, roundkeys ); \ + c2 = _mm256_xor_si256( c2, roundkeys ); \ + c3 = _mm256_xor_si256( c3, roundkeys ); \ + c4 = _mm256_xor_si256( c4, roundkeys ); \ + c5 = _mm256_xor_si256( c5, roundkeys ); \ + c6 = _mm256_xor_si256( c6, roundkeys ); \ + c7 = _mm256_xor_si256( c7, roundkeys ); \ +\ + do \ + { \ + roundkeys = _mm256_broadcastsi128_si256( *( (const __m128i *) keyPtr ) ); \ + keyPtr ++; \ + c0 = _mm256_aesdec_epi128( c0, roundkeys ); \ + c1 = _mm256_aesdec_epi128( c1, roundkeys ); \ + c2 = _mm256_aesdec_epi128( c2, roundkeys ); \ + c3 = _mm256_aesdec_epi128( c3, roundkeys ); \ + c4 = _mm256_aesdec_epi128( c4, roundkeys ); \ + c5 = _mm256_aesdec_epi128( c5, roundkeys ); \ + c6 = _mm256_aesdec_epi128( c6, roundkeys ); \ + c7 = _mm256_aesdec_epi128( c7, roundkeys ); \ + } while( keyPtr < keyLimit ); \ +\ + roundkeys = _mm256_broadcastsi128_si256( *( (const __m128i *) keyPtr ) ); \ +\ + c0 = _mm256_aesdeclast_epi128( c0, roundkeys ); \ + c1 = _mm256_aesdeclast_epi128( c1, roundkeys ); \ + c2 = _mm256_aesdeclast_epi128( c2, roundkeys ); \ + c3 = _mm256_aesdeclast_epi128( c3, roundkeys ); \ + c4 = _mm256_aesdeclast_epi128( c4, roundkeys ); \ + c5 = _mm256_aesdeclast_epi128( c5, roundkeys ); \ + c6 = _mm256_aesdeclast_epi128( c6, roundkeys ); \ + c7 = _mm256_aesdeclast_epi128( c7, roundkeys ); \ +}; + +VOID +SYMCRYPT_CALL +SymCryptXtsAesEncryptDataUnitYmm_2048( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbTweakBlock, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE*16 ) PBYTE pbScratch, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + __m128i t0, t1, t2, t3, t4, t5, t6, t7; + __m256i c0, c1, c2, c3, c4, c5, c6, c7; + __m128i XTS_ALPHA_MASK; + __m256i XTS_ALPHA_MULTIPLIER_Ymm; + + // Load tweaks into big T + __m256i T0, T1, T2, T3, T4, T5, T6, T7; + + SIZE_T cbDataMain; // number of bytes to handle in the main loop + SIZE_T cbDataTail; // number of bytes to handle in the tail loop + + // To simplify logic and unusual size processing, we handle all + // data not a multiple of 16 blocks in the tail loop + cbDataTail = cbData & ((16*SYMCRYPT_AES_BLOCK_SIZE)-1); + // Additionally, so that ciphertext stealing logic does not rely on + // reading back from the destination buffer, when we have a non-zero + // tail, we ensure that we handle at least 1 whole block in the tail + cbDataTail += ((cbDataTail > 0) && (cbDataTail < SYMCRYPT_AES_BLOCK_SIZE)) ? (16*SYMCRYPT_AES_BLOCK_SIZE) : 0; + cbDataMain = cbData - cbDataTail; + + SYMCRYPT_ASSERT(cbDataMain <= cbData); + SYMCRYPT_ASSERT(cbDataTail <= cbData); + SYMCRYPT_ASSERT((cbDataMain & ((16*SYMCRYPT_AES_BLOCK_SIZE)-1)) == 0); + + if( cbDataMain == 0 ) + { + SymCryptXtsAesEncryptDataUnitXmm( pExpandedKey, pbTweakBlock, pbScratch, pbSrc, pbDst, cbDataTail ); + return; + } + + t0 = _mm_loadu_si128( (__m128i *) pbTweakBlock ); + XTS_ALPHA_MASK = _mm_set_epi32( 1, 1, 1, 0x87 ); + XTS_ALPHA_MULTIPLIER_Ymm = _mm256_set_epi64x( 0, 0x87, 0, 0x87 ); + + // Do not stall. + XTS_MUL_ALPHA4( t0, t4 ); + XTS_MUL_ALPHA ( t0, t1 ); + XTS_MUL_ALPHA ( t4, t5 ); + XTS_MUL_ALPHA ( t1, t2 ); + XTS_MUL_ALPHA ( t5, t6 ); + XTS_MUL_ALPHA ( t2, t3 ); + XTS_MUL_ALPHA ( t6, t7 ); + + T0 = _mm256_insertf128_si256( _mm256_castsi128_si256( t0 ), t1, 1 ); // AVX + T1 = _mm256_insertf128_si256( _mm256_castsi128_si256( t2 ), t3, 1 ); + T2 = _mm256_insertf128_si256( _mm256_castsi128_si256( t4 ), t5, 1 ); + T3 = _mm256_insertf128_si256( _mm256_castsi128_si256( t6 ), t7, 1 ); + XTS_MUL_ALPHA8_YMM(T0, T4); + XTS_MUL_ALPHA8_YMM(T1, T5); + XTS_MUL_ALPHA8_YMM(T2, T6); + XTS_MUL_ALPHA8_YMM(T3, T7); + + for(;;) + { + c0 = _mm256_xor_si256( T0, _mm256_loadu_si256( ( __m256i * ) ( pbSrc + 0 ) ) ); + c1 = _mm256_xor_si256( T1, _mm256_loadu_si256( ( __m256i * ) ( pbSrc + 2*SYMCRYPT_AES_BLOCK_SIZE ) ) ); + c2 = _mm256_xor_si256( T2, _mm256_loadu_si256( ( __m256i * ) ( pbSrc + 4*SYMCRYPT_AES_BLOCK_SIZE ) ) ); + c3 = _mm256_xor_si256( T3, _mm256_loadu_si256( ( __m256i * ) ( pbSrc + 6*SYMCRYPT_AES_BLOCK_SIZE ) ) ); + c4 = _mm256_xor_si256( T4, _mm256_loadu_si256( ( __m256i * ) ( pbSrc + 8*SYMCRYPT_AES_BLOCK_SIZE ) ) ); + c5 = _mm256_xor_si256( T5, _mm256_loadu_si256( ( __m256i * ) ( pbSrc + 10*SYMCRYPT_AES_BLOCK_SIZE ) ) ); + c6 = _mm256_xor_si256( T6, _mm256_loadu_si256( ( __m256i * ) ( pbSrc + 12*SYMCRYPT_AES_BLOCK_SIZE ) ) ); + c7 = _mm256_xor_si256( T7, _mm256_loadu_si256( ( __m256i * ) ( pbSrc + 14*SYMCRYPT_AES_BLOCK_SIZE ) ) ); + + pbSrc += 16 * SYMCRYPT_AES_BLOCK_SIZE; + + AES_ENCRYPT_YMM_2048( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 ); + + _mm256_storeu_si256( ( __m256i * ) ( pbDst + 0 ), _mm256_xor_si256( c0, T0 ) ); + _mm256_storeu_si256( ( __m256i * ) ( pbDst + 2*SYMCRYPT_AES_BLOCK_SIZE ), _mm256_xor_si256( c1, T1 ) ); + _mm256_storeu_si256( ( __m256i * ) ( pbDst + 4*SYMCRYPT_AES_BLOCK_SIZE ), _mm256_xor_si256( c2, T2 ) ); + _mm256_storeu_si256( ( __m256i * ) ( pbDst + 6*SYMCRYPT_AES_BLOCK_SIZE ), _mm256_xor_si256( c3, T3 ) ); + _mm256_storeu_si256( ( __m256i * ) ( pbDst + 8*SYMCRYPT_AES_BLOCK_SIZE ), _mm256_xor_si256( c4, T4 ) ); + _mm256_storeu_si256( ( __m256i * ) ( pbDst + 10*SYMCRYPT_AES_BLOCK_SIZE ), _mm256_xor_si256( c5, T5 ) ); + _mm256_storeu_si256( ( __m256i * ) ( pbDst + 12*SYMCRYPT_AES_BLOCK_SIZE ), _mm256_xor_si256( c6, T6 ) ); + _mm256_storeu_si256( ( __m256i * ) ( pbDst + 14*SYMCRYPT_AES_BLOCK_SIZE ), _mm256_xor_si256( c7, T7 ) ); + + pbDst += 16 * SYMCRYPT_AES_BLOCK_SIZE; + + cbDataMain -= 16 * SYMCRYPT_AES_BLOCK_SIZE; + if( cbDataMain < 16 * SYMCRYPT_AES_BLOCK_SIZE ) + { + break; + } + + XTS_MUL_ALPHA16_YMM(T0, T0); + XTS_MUL_ALPHA16_YMM(T1, T1); + XTS_MUL_ALPHA16_YMM(T2, T2); + XTS_MUL_ALPHA16_YMM(T3, T3); + XTS_MUL_ALPHA16_YMM(T4, T4); + XTS_MUL_ALPHA16_YMM(T5, T5); + XTS_MUL_ALPHA16_YMM(T6, T6); + XTS_MUL_ALPHA16_YMM(T7, T7); + } + + // We won't do another 16-block set so we don't update the tweak blocks + + if( cbDataTail > 0 ) + { + // + // This is a rare case: the data unit length is not a multiple of 256 bytes. + // We do this in the Xmm implementation. + // Fix up the tweak block first + // + t7 = _mm256_extracti128_si256 ( T7, 1 /* Highest 128 bits */ ); // AVX2 + _mm256_zeroupper(); + XTS_MUL_ALPHA( t7, t0 ); + _mm_storeu_si128( (__m128i *) pbTweakBlock, t0 ); + + SymCryptXtsAesEncryptDataUnitXmm( pExpandedKey, pbTweakBlock, pbScratch, pbSrc, pbDst, cbDataTail ); + } + else { + _mm256_zeroupper(); + } +} + +VOID +SYMCRYPT_CALL +SymCryptXtsAesDecryptDataUnitYmm_2048( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbTweakBlock, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE*16 ) PBYTE pbScratch, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + __m128i t0, t1, t2, t3, t4, t5, t6, t7; + __m256i c0, c1, c2, c3, c4, c5, c6, c7; + __m128i XTS_ALPHA_MASK; + __m256i XTS_ALPHA_MULTIPLIER_Ymm; + + // Load tweaks into big T + __m256i T0, T1, T2, T3, T4, T5, T6, T7; + + SIZE_T cbDataMain; // number of bytes to handle in the main loop + SIZE_T cbDataTail; // number of bytes to handle in the tail loop + + // To simplify logic and unusual size processing, we handle all + // data not a multiple of 16 blocks in the tail loop + cbDataTail = cbData & ((16*SYMCRYPT_AES_BLOCK_SIZE)-1); + // Additionally, so that ciphertext stealing logic does not rely on + // reading back from the destination buffer, when we have a non-zero + // tail, we ensure that we handle at least 1 whole block in the tail + cbDataTail += ((cbDataTail > 0) && (cbDataTail < SYMCRYPT_AES_BLOCK_SIZE)) ? (16*SYMCRYPT_AES_BLOCK_SIZE) : 0; + cbDataMain = cbData - cbDataTail; + + SYMCRYPT_ASSERT(cbDataMain <= cbData); + SYMCRYPT_ASSERT(cbDataTail <= cbData); + SYMCRYPT_ASSERT((cbDataMain & ((16*SYMCRYPT_AES_BLOCK_SIZE)-1)) == 0); + + if( cbDataMain == 0 ) + { + SymCryptXtsAesDecryptDataUnitXmm( pExpandedKey, pbTweakBlock, pbScratch, pbSrc, pbDst, cbDataTail ); + return; + } + + t0 = _mm_loadu_si128( (__m128i *) pbTweakBlock ); + XTS_ALPHA_MASK = _mm_set_epi32( 1, 1, 1, 0x87 ); + XTS_ALPHA_MULTIPLIER_Ymm = _mm256_set_epi64x( 0, 0x87, 0, 0x87 ); + + // Do not stall. + XTS_MUL_ALPHA4( t0, t4 ); + XTS_MUL_ALPHA ( t0, t1 ); + XTS_MUL_ALPHA ( t4, t5 ); + XTS_MUL_ALPHA ( t1, t2 ); + XTS_MUL_ALPHA ( t5, t6 ); + XTS_MUL_ALPHA ( t2, t3 ); + XTS_MUL_ALPHA ( t6, t7 ); + + T0 = _mm256_insertf128_si256( _mm256_castsi128_si256( t0 ), t1, 1); // AVX + T1 = _mm256_insertf128_si256( _mm256_castsi128_si256( t2 ), t3, 1); + T2 = _mm256_insertf128_si256( _mm256_castsi128_si256( t4 ), t5, 1); + T3 = _mm256_insertf128_si256( _mm256_castsi128_si256( t6 ), t7, 1); + XTS_MUL_ALPHA8_YMM(T0, T4); + XTS_MUL_ALPHA8_YMM(T1, T5); + XTS_MUL_ALPHA8_YMM(T2, T6); + XTS_MUL_ALPHA8_YMM(T3, T7); + + for(;;) + { + c0 = _mm256_xor_si256( T0, _mm256_loadu_si256( ( __m256i * ) ( pbSrc + 0 ) ) ); + c1 = _mm256_xor_si256( T1, _mm256_loadu_si256( ( __m256i * ) ( pbSrc + 2*SYMCRYPT_AES_BLOCK_SIZE ) ) ); + c2 = _mm256_xor_si256( T2, _mm256_loadu_si256( ( __m256i * ) ( pbSrc + 4*SYMCRYPT_AES_BLOCK_SIZE ) ) ); + c3 = _mm256_xor_si256( T3, _mm256_loadu_si256( ( __m256i * ) ( pbSrc + 6*SYMCRYPT_AES_BLOCK_SIZE ) ) ); + c4 = _mm256_xor_si256( T4, _mm256_loadu_si256( ( __m256i * ) ( pbSrc + 8*SYMCRYPT_AES_BLOCK_SIZE ) ) ); + c5 = _mm256_xor_si256( T5, _mm256_loadu_si256( ( __m256i * ) ( pbSrc + 10*SYMCRYPT_AES_BLOCK_SIZE ) ) ); + c6 = _mm256_xor_si256( T6, _mm256_loadu_si256( ( __m256i * ) ( pbSrc + 12*SYMCRYPT_AES_BLOCK_SIZE ) ) ); + c7 = _mm256_xor_si256( T7, _mm256_loadu_si256( ( __m256i * ) ( pbSrc + 14*SYMCRYPT_AES_BLOCK_SIZE ) ) ); + + pbSrc += 16 * SYMCRYPT_AES_BLOCK_SIZE; + + AES_DECRYPT_YMM_2048( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 ); + + _mm256_storeu_si256( ( __m256i * ) ( pbDst + 0 ), _mm256_xor_si256( c0, T0 ) ); + _mm256_storeu_si256( ( __m256i * ) ( pbDst + 2*SYMCRYPT_AES_BLOCK_SIZE ), _mm256_xor_si256( c1, T1 ) ); + _mm256_storeu_si256( ( __m256i * ) ( pbDst + 4*SYMCRYPT_AES_BLOCK_SIZE ), _mm256_xor_si256( c2, T2 ) ); + _mm256_storeu_si256( ( __m256i * ) ( pbDst + 6*SYMCRYPT_AES_BLOCK_SIZE ), _mm256_xor_si256( c3, T3 ) ); + _mm256_storeu_si256( ( __m256i * ) ( pbDst + 8*SYMCRYPT_AES_BLOCK_SIZE ), _mm256_xor_si256( c4, T4 ) ); + _mm256_storeu_si256( ( __m256i * ) ( pbDst + 10*SYMCRYPT_AES_BLOCK_SIZE ), _mm256_xor_si256( c5, T5 ) ); + _mm256_storeu_si256( ( __m256i * ) ( pbDst + 12*SYMCRYPT_AES_BLOCK_SIZE ), _mm256_xor_si256( c6, T6 ) ); + _mm256_storeu_si256( ( __m256i * ) ( pbDst + 14*SYMCRYPT_AES_BLOCK_SIZE ), _mm256_xor_si256( c7, T7 ) ); + + pbDst += 16 * SYMCRYPT_AES_BLOCK_SIZE; + + cbDataMain -= 16 * SYMCRYPT_AES_BLOCK_SIZE; + if( cbDataMain < 16 * SYMCRYPT_AES_BLOCK_SIZE ) + { + break; + } + + XTS_MUL_ALPHA16_YMM(T0, T0); + XTS_MUL_ALPHA16_YMM(T1, T1); + XTS_MUL_ALPHA16_YMM(T2, T2); + XTS_MUL_ALPHA16_YMM(T3, T3); + XTS_MUL_ALPHA16_YMM(T4, T4); + XTS_MUL_ALPHA16_YMM(T5, T5); + XTS_MUL_ALPHA16_YMM(T6, T6); + XTS_MUL_ALPHA16_YMM(T7, T7); + } + + // We won't do another 16-block set so we don't update the tweak blocks + + if( cbDataTail > 0 ) + { + // + // This is a rare case: the data unit length is not a multiple of 256 bytes. + // We do this in the Xmm implementation. + // Fix up the tweak block first + // + t7 = _mm256_extracti128_si256 ( T7, 1 /* Highest 128 bits */ ); // AVX2 + _mm256_zeroupper(); + XTS_MUL_ALPHA( t7, t0 ); + _mm_storeu_si128( (__m128i *) pbTweakBlock, t0 ); + + SymCryptXtsAesDecryptDataUnitXmm( pExpandedKey, pbTweakBlock, pbScratch, pbSrc, pbDst, cbDataTail ); + } + else { + _mm256_zeroupper(); + } +} + +#define AES_FULLROUND_16_GHASH_2_Ymm( roundkeys, keyPtr, c0, c1, c2, c3, c4, c5, c6, c7, r0, t0, t1, gHashPointer, byteReverseOrder, gHashExpandedKeyTable, todo, resl, resm, resh ) \ +{ \ + roundkeys = _mm256_broadcastsi128_si256( *( (const __m128i *) keyPtr ) ); \ + keyPtr ++; \ + c0 = _mm256_aesenc_epi128( c0, roundkeys ); \ + c1 = _mm256_aesenc_epi128( c1, roundkeys ); \ + c2 = _mm256_aesenc_epi128( c2, roundkeys ); \ + c3 = _mm256_aesenc_epi128( c3, roundkeys ); \ + c4 = _mm256_aesenc_epi128( c4, roundkeys ); \ + c5 = _mm256_aesenc_epi128( c5, roundkeys ); \ + c6 = _mm256_aesenc_epi128( c6, roundkeys ); \ + c7 = _mm256_aesenc_epi128( c7, roundkeys ); \ +\ + r0 = _mm256_loadu_si256( (__m256i *) gHashPointer ); \ + r0 = _mm256_shuffle_epi8( r0, byteReverseOrder ); \ + gHashPointer += 32; \ +\ + t1 = _mm256_loadu_si256( (__m256i *) &GHASH_H_POWER(gHashExpandedKeyTable, todo) ); \ + t0 = _mm256_clmulepi64_epi128( r0, t1, 0x00 ); \ + t1 = _mm256_clmulepi64_epi128( r0, t1, 0x11 ); \ +\ + resl = _mm256_xor_si256( resl, t0 ); \ + resh = _mm256_xor_si256( resh, t1 ); \ +\ + t0 = _mm256_srli_si256( r0, 8 ); \ + r0 = _mm256_xor_si256( r0, t0 ); \ + t1 = _mm256_loadu_si256( (__m256i *) &GHASH_Hx_POWER(gHashExpandedKeyTable, todo) ); \ + t1 = _mm256_clmulepi64_epi128( r0, t1, 0x00 ); \ +\ + resm = _mm256_xor_si256( resm, t1 ); \ + todo -= 2; \ +}; + +#define AES_GCM_ENCRYPT_16_Ymm( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7, gHashPointer, byteReverseOrder, gHashExpandedKeyTable, todo, resl, resm, resh ) \ +{ \ + const BYTE (*keyPtr)[4][4]; \ + const BYTE (*keyLimit)[4][4]; \ + __m256i roundkeys; \ + __m256i t0, t1; \ + __m256i r0; \ + int aesEncryptGhashLoop; \ +\ + keyPtr = pExpandedKey->RoundKey; \ + keyLimit = pExpandedKey->lastEncRoundKey; \ +\ + /* _mm256_broadcastsi128_si256 requires AVX2 */ \ + roundkeys = _mm256_broadcastsi128_si256( *( (const __m128i *) keyPtr ) ); \ + keyPtr ++; \ +\ + /* _mm256_xor_si256 requires AVX2 */ \ + c0 = _mm256_xor_si256( c0, roundkeys ); \ + c1 = _mm256_xor_si256( c1, roundkeys ); \ + c2 = _mm256_xor_si256( c2, roundkeys ); \ + c3 = _mm256_xor_si256( c3, roundkeys ); \ + c4 = _mm256_xor_si256( c4, roundkeys ); \ + c5 = _mm256_xor_si256( c5, roundkeys ); \ + c6 = _mm256_xor_si256( c6, roundkeys ); \ + c7 = _mm256_xor_si256( c7, roundkeys ); \ +\ + /* Do 8(x2) full rounds (AES-128|AES-192|AES-256) with stitched GHASH */ \ + for( aesEncryptGhashLoop = 0; aesEncryptGhashLoop < 4; aesEncryptGhashLoop++ ) \ + { \ + AES_FULLROUND_16_GHASH_2_Ymm( roundkeys, keyPtr, c0, c1, c2, c3, c4, c5, c6, c7, r0, t0, t1, gHashPointer, byteReverseOrder, gHashExpandedKeyTable, todo, resl, resm, resh ); \ + AES_FULLROUND_16_GHASH_2_Ymm( roundkeys, keyPtr, c0, c1, c2, c3, c4, c5, c6, c7, r0, t0, t1, gHashPointer, byteReverseOrder, gHashExpandedKeyTable, todo, resl, resm, resh ); \ + } \ +\ + do \ + { \ + roundkeys = _mm256_broadcastsi128_si256( *( (const __m128i *) keyPtr ) ); \ + keyPtr ++; \ + c0 = _mm256_aesenc_epi128( c0, roundkeys ); \ + c1 = _mm256_aesenc_epi128( c1, roundkeys ); \ + c2 = _mm256_aesenc_epi128( c2, roundkeys ); \ + c3 = _mm256_aesenc_epi128( c3, roundkeys ); \ + c4 = _mm256_aesenc_epi128( c4, roundkeys ); \ + c5 = _mm256_aesenc_epi128( c5, roundkeys ); \ + c6 = _mm256_aesenc_epi128( c6, roundkeys ); \ + c7 = _mm256_aesenc_epi128( c7, roundkeys ); \ + } while( keyPtr < keyLimit ); \ +\ + roundkeys = _mm256_broadcastsi128_si256( *( (const __m128i *) keyPtr ) ); \ +\ + c0 = _mm256_aesenclast_epi128( c0, roundkeys ); \ + c1 = _mm256_aesenclast_epi128( c1, roundkeys ); \ + c2 = _mm256_aesenclast_epi128( c2, roundkeys ); \ + c3 = _mm256_aesenclast_epi128( c3, roundkeys ); \ + c4 = _mm256_aesenclast_epi128( c4, roundkeys ); \ + c5 = _mm256_aesenclast_epi128( c5, roundkeys ); \ + c6 = _mm256_aesenclast_epi128( c6, roundkeys ); \ + c7 = _mm256_aesenclast_epi128( c7, roundkeys ); \ +}; + +VOID +SYMCRYPT_CALL +SymCryptAesGcmEncryptStitchedYmm_2048( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( SYMCRYPT_GF128_FIELD_SIZE ) PCSYMCRYPT_GF128_ELEMENT expandedKeyTable, + _Inout_ PSYMCRYPT_GF128_ELEMENT pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + __m128i chain = _mm_loadu_si128( (__m128i *) pbChainingValue ); + + __m128i BYTE_REVERSE_ORDER_xmm = _mm_set_epi8( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ); + __m256i BYTE_REVERSE_ORDER = _mm256_set_epi64x( 0x0001020304050607, 0x08090a0b0c0d0e0f, 0x0001020304050607, 0x08090a0b0c0d0e0f ); + __m128i vMultiplicationConstant = _mm_set_epi32( 0, 0, 0xc2000000, 0 ); + + __m256i chainIncrementUpper1 = _mm256_set_epi64x( 0, 1, 0, 0 ); + __m256i chainIncrement2 = _mm256_set_epi64x( 0, 2, 0, 2 ); + __m256i chainIncrement4 = _mm256_set_epi64x( 0, 4, 0, 4 ); + __m256i chainIncrement16 = _mm256_set_epi64x( 0, 16, 0, 16 ); + + __m256i ctr0, ctr1, ctr2, ctr3, ctr4, ctr5, ctr6, ctr7; + __m256i c0, c1, c2, c3, c4, c5, c6, c7; + __m256i r0, r1, r2, r3, r4, r5, r6, r7; + __m256i Hi, Hix; + + __m128i state; + __m128i a0_xmm, a1_xmm, a2_xmm; + __m256i a0, a1, a2; + SIZE_T nBlocks = cbData / SYMCRYPT_GF128_BLOCK_SIZE; + SIZE_T todo; + PCBYTE pbGhashSrc = pbDst; + + SYMCRYPT_ASSERT( (cbData & SYMCRYPT_GCM_BLOCK_MOD_MASK) == 0 ); // cbData is multiple of block size + SYMCRYPT_ASSERT( nBlocks >= GCM_YMM_MINBLOCKS ); + + todo = SYMCRYPT_MIN( nBlocks, SYMCRYPT_GHASH_PCLMULQDQ_HPOWERS ) & ~(GCM_YMM_MINBLOCKS-1); + chain = _mm_shuffle_epi8( chain, BYTE_REVERSE_ORDER_xmm ); + + state = _mm_loadu_si128( (__m128i *) pState ); + ctr0 = _mm256_insertf128_si256( _mm256_castsi128_si256( chain ), chain, 1); // AVX + ctr0 = _mm256_add_epi32( ctr0, chainIncrementUpper1 ); + ctr1 = _mm256_add_epi32( ctr0, chainIncrement2 ); + ctr2 = _mm256_add_epi32( ctr0, chainIncrement4 ); + ctr3 = _mm256_add_epi32( ctr1, chainIncrement4 ); + ctr4 = _mm256_add_epi32( ctr2, chainIncrement4 ); + ctr5 = _mm256_add_epi32( ctr3, chainIncrement4 ); + ctr6 = _mm256_add_epi32( ctr4, chainIncrement4 ); + ctr7 = _mm256_add_epi32( ctr5, chainIncrement4 ); + + CLMUL_3( state, GHASH_H_POWER(expandedKeyTable, todo), GHASH_Hx_POWER(expandedKeyTable, todo), a0_xmm, a1_xmm, a2_xmm ); + a0 = a1 = a2 = _mm256_setzero_si256(); + + c0 = _mm256_shuffle_epi8( ctr0, BYTE_REVERSE_ORDER ); + c1 = _mm256_shuffle_epi8( ctr1, BYTE_REVERSE_ORDER ); + c2 = _mm256_shuffle_epi8( ctr2, BYTE_REVERSE_ORDER ); + c3 = _mm256_shuffle_epi8( ctr3, BYTE_REVERSE_ORDER ); + c4 = _mm256_shuffle_epi8( ctr4, BYTE_REVERSE_ORDER ); + c5 = _mm256_shuffle_epi8( ctr5, BYTE_REVERSE_ORDER ); + c6 = _mm256_shuffle_epi8( ctr6, BYTE_REVERSE_ORDER ); + c7 = _mm256_shuffle_epi8( ctr7, BYTE_REVERSE_ORDER ); + + ctr0 = _mm256_add_epi32( ctr0, chainIncrement16 ); + ctr1 = _mm256_add_epi32( ctr1, chainIncrement16 ); + ctr2 = _mm256_add_epi32( ctr2, chainIncrement16 ); + ctr3 = _mm256_add_epi32( ctr3, chainIncrement16 ); + ctr4 = _mm256_add_epi32( ctr4, chainIncrement16 ); + ctr5 = _mm256_add_epi32( ctr5, chainIncrement16 ); + ctr6 = _mm256_add_epi32( ctr6, chainIncrement16 ); + ctr7 = _mm256_add_epi32( ctr7, chainIncrement16 ); + + AES_ENCRYPT_YMM_2048( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7 ); + + _mm256_storeu_si256( (__m256i *) (pbDst + 0), _mm256_xor_si256( c0, _mm256_loadu_si256( ( __m256i * ) (pbSrc + 0) ) ) ); + _mm256_storeu_si256( (__m256i *) (pbDst + 32), _mm256_xor_si256( c1, _mm256_loadu_si256( ( __m256i * ) (pbSrc + 32) ) ) ); + _mm256_storeu_si256( (__m256i *) (pbDst + 64), _mm256_xor_si256( c2, _mm256_loadu_si256( ( __m256i * ) (pbSrc + 64) ) ) ); + _mm256_storeu_si256( (__m256i *) (pbDst + 96), _mm256_xor_si256( c3, _mm256_loadu_si256( ( __m256i * ) (pbSrc + 96) ) ) ); + _mm256_storeu_si256( (__m256i *) (pbDst +128), _mm256_xor_si256( c4, _mm256_loadu_si256( ( __m256i * ) (pbSrc +128) ) ) ); + _mm256_storeu_si256( (__m256i *) (pbDst +160), _mm256_xor_si256( c5, _mm256_loadu_si256( ( __m256i * ) (pbSrc +160) ) ) ); + _mm256_storeu_si256( (__m256i *) (pbDst +192), _mm256_xor_si256( c6, _mm256_loadu_si256( ( __m256i * ) (pbSrc +192) ) ) ); + _mm256_storeu_si256( (__m256i *) (pbDst +224), _mm256_xor_si256( c7, _mm256_loadu_si256( ( __m256i * ) (pbSrc +224) ) ) ); + + pbDst += 16 * SYMCRYPT_AES_BLOCK_SIZE; + pbSrc += 16 * SYMCRYPT_AES_BLOCK_SIZE; + + while( nBlocks >= 2*GCM_YMM_MINBLOCKS ) + { + c0 = _mm256_shuffle_epi8( ctr0, BYTE_REVERSE_ORDER ); + c1 = _mm256_shuffle_epi8( ctr1, BYTE_REVERSE_ORDER ); + c2 = _mm256_shuffle_epi8( ctr2, BYTE_REVERSE_ORDER ); + c3 = _mm256_shuffle_epi8( ctr3, BYTE_REVERSE_ORDER ); + c4 = _mm256_shuffle_epi8( ctr4, BYTE_REVERSE_ORDER ); + c5 = _mm256_shuffle_epi8( ctr5, BYTE_REVERSE_ORDER ); + c6 = _mm256_shuffle_epi8( ctr6, BYTE_REVERSE_ORDER ); + c7 = _mm256_shuffle_epi8( ctr7, BYTE_REVERSE_ORDER ); + + ctr0 = _mm256_add_epi32( ctr0, chainIncrement16 ); + ctr1 = _mm256_add_epi32( ctr1, chainIncrement16 ); + ctr2 = _mm256_add_epi32( ctr2, chainIncrement16 ); + ctr3 = _mm256_add_epi32( ctr3, chainIncrement16 ); + ctr4 = _mm256_add_epi32( ctr4, chainIncrement16 ); + ctr5 = _mm256_add_epi32( ctr5, chainIncrement16 ); + ctr6 = _mm256_add_epi32( ctr6, chainIncrement16 ); + ctr7 = _mm256_add_epi32( ctr7, chainIncrement16 ); + + AES_GCM_ENCRYPT_16_Ymm( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7, pbGhashSrc, BYTE_REVERSE_ORDER, expandedKeyTable, todo, a0, a1, a2 ); + + _mm256_storeu_si256( (__m256i *) (pbDst + 0), _mm256_xor_si256( c0, _mm256_loadu_si256( ( __m256i * ) (pbSrc + 0) ) ) ); + _mm256_storeu_si256( (__m256i *) (pbDst + 32), _mm256_xor_si256( c1, _mm256_loadu_si256( ( __m256i * ) (pbSrc + 32) ) ) ); + _mm256_storeu_si256( (__m256i *) (pbDst + 64), _mm256_xor_si256( c2, _mm256_loadu_si256( ( __m256i * ) (pbSrc + 64) ) ) ); + _mm256_storeu_si256( (__m256i *) (pbDst + 96), _mm256_xor_si256( c3, _mm256_loadu_si256( ( __m256i * ) (pbSrc + 96) ) ) ); + _mm256_storeu_si256( (__m256i *) (pbDst +128), _mm256_xor_si256( c4, _mm256_loadu_si256( ( __m256i * ) (pbSrc +128) ) ) ); + _mm256_storeu_si256( (__m256i *) (pbDst +160), _mm256_xor_si256( c5, _mm256_loadu_si256( ( __m256i * ) (pbSrc +160) ) ) ); + _mm256_storeu_si256( (__m256i *) (pbDst +192), _mm256_xor_si256( c6, _mm256_loadu_si256( ( __m256i * ) (pbSrc +192) ) ) ); + _mm256_storeu_si256( (__m256i *) (pbDst +224), _mm256_xor_si256( c7, _mm256_loadu_si256( ( __m256i * ) (pbSrc +224) ) ) ); + + pbDst += 16 * SYMCRYPT_AES_BLOCK_SIZE; + pbSrc += 16 * SYMCRYPT_AES_BLOCK_SIZE; + nBlocks -= 16; + + if ( todo == 0 ) + { + a0_xmm = _mm_xor_si128( a0_xmm, _mm256_extracti128_si256 ( a0, 0 /* Lowest 128 bits */ )); + a1_xmm = _mm_xor_si128( a1_xmm, _mm256_extracti128_si256 ( a1, 0 /* Lowest 128 bits */ )); + a2_xmm = _mm_xor_si128( a2_xmm, _mm256_extracti128_si256 ( a2, 0 /* Lowest 128 bits */ )); + + a0_xmm = _mm_xor_si128( a0_xmm, _mm256_extracti128_si256 ( a0, 1 /* Highest 128 bits */ )); + a1_xmm = _mm_xor_si128( a1_xmm, _mm256_extracti128_si256 ( a1, 1 /* Highest 128 bits */ )); + a2_xmm = _mm_xor_si128( a2_xmm, _mm256_extracti128_si256 ( a2, 1 /* Highest 128 bits */ )); + CLMUL_3_POST( a0_xmm, a1_xmm, a2_xmm ); + MODREDUCE( vMultiplicationConstant, a0_xmm, a1_xmm, a2_xmm, state ); + + todo = SYMCRYPT_MIN( nBlocks, SYMCRYPT_GHASH_PCLMULQDQ_HPOWERS ) & ~(GCM_YMM_MINBLOCKS-1); + CLMUL_3( state, GHASH_H_POWER(expandedKeyTable, todo), GHASH_Hx_POWER(expandedKeyTable, todo), a0_xmm, a1_xmm, a2_xmm ); + a0 = a1 = a2 = _mm256_setzero_si256(); + } + } + + r0 = _mm256_shuffle_epi8( _mm256_loadu_si256( (__m256i *) (pbGhashSrc + 0) ), BYTE_REVERSE_ORDER ); + r1 = _mm256_shuffle_epi8( _mm256_loadu_si256( (__m256i *) (pbGhashSrc + 32) ), BYTE_REVERSE_ORDER ); + r2 = _mm256_shuffle_epi8( _mm256_loadu_si256( (__m256i *) (pbGhashSrc + 64) ), BYTE_REVERSE_ORDER ); + r3 = _mm256_shuffle_epi8( _mm256_loadu_si256( (__m256i *) (pbGhashSrc + 96) ), BYTE_REVERSE_ORDER ); + r4 = _mm256_shuffle_epi8( _mm256_loadu_si256( (__m256i *) (pbGhashSrc +128) ), BYTE_REVERSE_ORDER ); + r5 = _mm256_shuffle_epi8( _mm256_loadu_si256( (__m256i *) (pbGhashSrc +160) ), BYTE_REVERSE_ORDER ); + r6 = _mm256_shuffle_epi8( _mm256_loadu_si256( (__m256i *) (pbGhashSrc +192) ), BYTE_REVERSE_ORDER ); + r7 = _mm256_shuffle_epi8( _mm256_loadu_si256( (__m256i *) (pbGhashSrc +224) ), BYTE_REVERSE_ORDER ); + + Hi = _mm256_loadu_si256( (__m256i *) &GHASH_H_POWER(expandedKeyTable, todo - 0) ); + Hix = _mm256_loadu_si256( (__m256i *) &GHASH_Hx_POWER(expandedKeyTable, todo - 0) ); + CLMUL_ACC_3_Ymm( r0, Hi, Hix, a0, a1, a2 ); + Hi = _mm256_loadu_si256( (__m256i *) &GHASH_H_POWER(expandedKeyTable, todo - 2) ); + Hix = _mm256_loadu_si256( (__m256i *) &GHASH_Hx_POWER(expandedKeyTable, todo - 2) ); + CLMUL_ACC_3_Ymm( r1, Hi, Hix, a0, a1, a2 ); + Hi = _mm256_loadu_si256( (__m256i *) &GHASH_H_POWER(expandedKeyTable, todo - 4) ); + Hix = _mm256_loadu_si256( (__m256i *) &GHASH_Hx_POWER(expandedKeyTable, todo - 4) ); + CLMUL_ACC_3_Ymm( r2, Hi, Hix, a0, a1, a2 ); + Hi = _mm256_loadu_si256( (__m256i *) &GHASH_H_POWER(expandedKeyTable, todo - 6) ); + Hix = _mm256_loadu_si256( (__m256i *) &GHASH_Hx_POWER(expandedKeyTable, todo - 6) ); + CLMUL_ACC_3_Ymm( r3, Hi, Hix, a0, a1, a2 ); + Hi = _mm256_loadu_si256( (__m256i *) &GHASH_H_POWER(expandedKeyTable, todo - 8) ); + Hix = _mm256_loadu_si256( (__m256i *) &GHASH_Hx_POWER(expandedKeyTable, todo - 8) ); + CLMUL_ACC_3_Ymm( r4, Hi, Hix, a0, a1, a2 ); + Hi = _mm256_loadu_si256( (__m256i *) &GHASH_H_POWER(expandedKeyTable, todo -10) ); + Hix = _mm256_loadu_si256( (__m256i *) &GHASH_Hx_POWER(expandedKeyTable, todo -10) ); + CLMUL_ACC_3_Ymm( r5, Hi, Hix, a0, a1, a2 ); + Hi = _mm256_loadu_si256( (__m256i *) &GHASH_H_POWER(expandedKeyTable, todo -12) ); + Hix = _mm256_loadu_si256( (__m256i *) &GHASH_Hx_POWER(expandedKeyTable, todo -12) ); + CLMUL_ACC_3_Ymm( r6, Hi, Hix, a0, a1, a2 ); + Hi = _mm256_loadu_si256( (__m256i *) &GHASH_H_POWER(expandedKeyTable, todo -14) ); + Hix = _mm256_loadu_si256( (__m256i *) &GHASH_Hx_POWER(expandedKeyTable, todo -14) ); + CLMUL_ACC_3_Ymm( r7, Hi, Hix, a0, a1, a2 ); + + a0_xmm = _mm_xor_si128( a0_xmm, _mm256_extracti128_si256 ( a0, 0 /* Lowest 128 bits */ )); + a1_xmm = _mm_xor_si128( a1_xmm, _mm256_extracti128_si256 ( a1, 0 /* Lowest 128 bits */ )); + a2_xmm = _mm_xor_si128( a2_xmm, _mm256_extracti128_si256 ( a2, 0 /* Lowest 128 bits */ )); + + a0_xmm = _mm_xor_si128( a0_xmm, _mm256_extracti128_si256 ( a0, 1 /* Highest 128 bits */ )); + a1_xmm = _mm_xor_si128( a1_xmm, _mm256_extracti128_si256 ( a1, 1 /* Highest 128 bits */ )); + a2_xmm = _mm_xor_si128( a2_xmm, _mm256_extracti128_si256 ( a2, 1 /* Highest 128 bits */ )); + CLMUL_3_POST( a0_xmm, a1_xmm, a2_xmm ); + MODREDUCE( vMultiplicationConstant, a0_xmm, a1_xmm, a2_xmm, state ); + + chain = _mm256_extracti128_si256 ( ctr0, 0 /* Lowest 128 bits */ ); + _mm256_zeroupper(); + + chain = _mm_shuffle_epi8( chain, BYTE_REVERSE_ORDER_xmm ); + _mm_storeu_si128((__m128i *) pbChainingValue, chain ); + _mm_storeu_si128((__m128i *) pState, state ); + + cbData &= ( GCM_YMM_MINBLOCKS*SYMCRYPT_AES_BLOCK_SIZE ) - 1; + SYMCRYPT_ASSERT( cbData == (nBlocks-16)*SYMCRYPT_AES_BLOCK_SIZE ); + if ( cbData >= SYMCRYPT_AES_BLOCK_SIZE ) + { + SymCryptAesGcmEncryptStitchedXmm( pExpandedKey, pbChainingValue, expandedKeyTable, pState, pbSrc, pbDst, cbData); + } +} + +VOID +SYMCRYPT_CALL +SymCryptAesGcmDecryptStitchedYmm_2048( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( SYMCRYPT_GF128_FIELD_SIZE ) PCSYMCRYPT_GF128_ELEMENT expandedKeyTable, + _Inout_ PSYMCRYPT_GF128_ELEMENT pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + __m128i chain = _mm_loadu_si128( (__m128i *) pbChainingValue ); + + __m128i BYTE_REVERSE_ORDER_xmm = _mm_set_epi8( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ); + __m256i BYTE_REVERSE_ORDER = _mm256_set_epi64x( 0x0001020304050607, 0x08090a0b0c0d0e0f, 0x0001020304050607, 0x08090a0b0c0d0e0f ); + __m128i vMultiplicationConstant = _mm_set_epi32( 0, 0, 0xc2000000, 0 ); + + __m256i chainIncrementUpper1 = _mm256_set_epi64x( 0, 1, 0, 0 ); + __m256i chainIncrement2 = _mm256_set_epi64x( 0, 2, 0, 2 ); + __m256i chainIncrement4 = _mm256_set_epi64x( 0, 4, 0, 4 ); + __m256i chainIncrement16 = _mm256_set_epi64x( 0, 16, 0, 16 ); + + __m256i ctr0, ctr1, ctr2, ctr3, ctr4, ctr5, ctr6, ctr7; + __m256i c0, c1, c2, c3, c4, c5, c6, c7; + + __m128i state; + __m128i a0_xmm, a1_xmm, a2_xmm; + __m256i a0, a1, a2; + SIZE_T nBlocks = cbData / SYMCRYPT_GF128_BLOCK_SIZE; + SIZE_T todo; + PCBYTE pbGhashSrc = pbSrc; + + SYMCRYPT_ASSERT( (cbData & SYMCRYPT_GCM_BLOCK_MOD_MASK) == 0 ); // cbData is multiple of block size + SYMCRYPT_ASSERT( nBlocks >= GCM_YMM_MINBLOCKS ); + + todo = SYMCRYPT_MIN( nBlocks, SYMCRYPT_GHASH_PCLMULQDQ_HPOWERS ) & ~(GCM_YMM_MINBLOCKS-1); + chain = _mm_shuffle_epi8( chain, BYTE_REVERSE_ORDER_xmm ); + + state = _mm_loadu_si128( (__m128i *) pState ); + ctr0 = _mm256_insertf128_si256( _mm256_castsi128_si256( chain ), chain, 1); // AVX + ctr0 = _mm256_add_epi32( ctr0, chainIncrementUpper1 ); + ctr1 = _mm256_add_epi32( ctr0, chainIncrement2 ); + ctr2 = _mm256_add_epi32( ctr0, chainIncrement4 ); + ctr3 = _mm256_add_epi32( ctr1, chainIncrement4 ); + ctr4 = _mm256_add_epi32( ctr2, chainIncrement4 ); + ctr5 = _mm256_add_epi32( ctr3, chainIncrement4 ); + ctr6 = _mm256_add_epi32( ctr4, chainIncrement4 ); + ctr7 = _mm256_add_epi32( ctr5, chainIncrement4 ); + + CLMUL_3( state, GHASH_H_POWER(expandedKeyTable, todo), GHASH_Hx_POWER(expandedKeyTable, todo), a0_xmm, a1_xmm, a2_xmm ); + a0 = a1 = a2 = _mm256_setzero_si256(); + + while( nBlocks >= GCM_YMM_MINBLOCKS ) + { + c0 = _mm256_shuffle_epi8( ctr0, BYTE_REVERSE_ORDER ); + c1 = _mm256_shuffle_epi8( ctr1, BYTE_REVERSE_ORDER ); + c2 = _mm256_shuffle_epi8( ctr2, BYTE_REVERSE_ORDER ); + c3 = _mm256_shuffle_epi8( ctr3, BYTE_REVERSE_ORDER ); + c4 = _mm256_shuffle_epi8( ctr4, BYTE_REVERSE_ORDER ); + c5 = _mm256_shuffle_epi8( ctr5, BYTE_REVERSE_ORDER ); + c6 = _mm256_shuffle_epi8( ctr6, BYTE_REVERSE_ORDER ); + c7 = _mm256_shuffle_epi8( ctr7, BYTE_REVERSE_ORDER ); + + ctr0 = _mm256_add_epi32( ctr0, chainIncrement16 ); + ctr1 = _mm256_add_epi32( ctr1, chainIncrement16 ); + ctr2 = _mm256_add_epi32( ctr2, chainIncrement16 ); + ctr3 = _mm256_add_epi32( ctr3, chainIncrement16 ); + ctr4 = _mm256_add_epi32( ctr4, chainIncrement16 ); + ctr5 = _mm256_add_epi32( ctr5, chainIncrement16 ); + ctr6 = _mm256_add_epi32( ctr6, chainIncrement16 ); + ctr7 = _mm256_add_epi32( ctr7, chainIncrement16 ); + + AES_GCM_ENCRYPT_16_Ymm( pExpandedKey, c0, c1, c2, c3, c4, c5, c6, c7, pbGhashSrc, BYTE_REVERSE_ORDER, expandedKeyTable, todo, a0, a1, a2 ); + + _mm256_storeu_si256( (__m256i *) (pbDst + 0), _mm256_xor_si256( c0, _mm256_loadu_si256( ( __m256i * ) (pbSrc + 0) ) ) ); + _mm256_storeu_si256( (__m256i *) (pbDst + 32), _mm256_xor_si256( c1, _mm256_loadu_si256( ( __m256i * ) (pbSrc + 32) ) ) ); + _mm256_storeu_si256( (__m256i *) (pbDst + 64), _mm256_xor_si256( c2, _mm256_loadu_si256( ( __m256i * ) (pbSrc + 64) ) ) ); + _mm256_storeu_si256( (__m256i *) (pbDst + 96), _mm256_xor_si256( c3, _mm256_loadu_si256( ( __m256i * ) (pbSrc + 96) ) ) ); + _mm256_storeu_si256( (__m256i *) (pbDst +128), _mm256_xor_si256( c4, _mm256_loadu_si256( ( __m256i * ) (pbSrc +128) ) ) ); + _mm256_storeu_si256( (__m256i *) (pbDst +160), _mm256_xor_si256( c5, _mm256_loadu_si256( ( __m256i * ) (pbSrc +160) ) ) ); + _mm256_storeu_si256( (__m256i *) (pbDst +192), _mm256_xor_si256( c6, _mm256_loadu_si256( ( __m256i * ) (pbSrc +192) ) ) ); + _mm256_storeu_si256( (__m256i *) (pbDst +224), _mm256_xor_si256( c7, _mm256_loadu_si256( ( __m256i * ) (pbSrc +224) ) ) ); + + pbDst += 16 * SYMCRYPT_AES_BLOCK_SIZE; + pbSrc += 16 * SYMCRYPT_AES_BLOCK_SIZE; + nBlocks -= 16; + + if ( todo == 0 ) + { + a0_xmm = _mm_xor_si128( a0_xmm, _mm256_extracti128_si256 ( a0, 0 /* Lowest 128 bits */ )); + a1_xmm = _mm_xor_si128( a1_xmm, _mm256_extracti128_si256 ( a1, 0 /* Lowest 128 bits */ )); + a2_xmm = _mm_xor_si128( a2_xmm, _mm256_extracti128_si256 ( a2, 0 /* Lowest 128 bits */ )); + + a0_xmm = _mm_xor_si128( a0_xmm, _mm256_extracti128_si256 ( a0, 1 /* Highest 128 bits */ )); + a1_xmm = _mm_xor_si128( a1_xmm, _mm256_extracti128_si256 ( a1, 1 /* Highest 128 bits */ )); + a2_xmm = _mm_xor_si128( a2_xmm, _mm256_extracti128_si256 ( a2, 1 /* Highest 128 bits */ )); + CLMUL_3_POST( a0_xmm, a1_xmm, a2_xmm ); + MODREDUCE( vMultiplicationConstant, a0_xmm, a1_xmm, a2_xmm, state ); + + if ( nBlocks > 0 ) + { + todo = SYMCRYPT_MIN( nBlocks, SYMCRYPT_GHASH_PCLMULQDQ_HPOWERS ) & ~(GCM_YMM_MINBLOCKS-1); + CLMUL_3( state, GHASH_H_POWER(expandedKeyTable, todo), GHASH_Hx_POWER(expandedKeyTable, todo), a0_xmm, a1_xmm, a2_xmm ); + a0 = a1 = a2 = _mm256_setzero_si256(); + } + } + } + + chain = _mm256_extracti128_si256 ( ctr0, 0 /* Lowest 128 bits */ ); + _mm256_zeroupper(); + + chain = _mm_shuffle_epi8( chain, BYTE_REVERSE_ORDER_xmm ); + _mm_storeu_si128((__m128i *) pbChainingValue, chain ); + _mm_storeu_si128((__m128i *) pState, state ); + + cbData &= ( GCM_YMM_MINBLOCKS*SYMCRYPT_AES_BLOCK_SIZE ) - 1; + SYMCRYPT_ASSERT( cbData == nBlocks*SYMCRYPT_AES_BLOCK_SIZE ); + if ( cbData >= SYMCRYPT_AES_BLOCK_SIZE ) + { + SymCryptAesGcmDecryptStitchedXmm( pExpandedKey, pbChainingValue, expandedKeyTable, pState, pbSrc, pbDst, cbData); + } +} + +#ifdef __clang__ +#pragma clang attribute pop +#else +#pragma GCC pop_options +#endif + +#endif // CPU_X86 | CPU_AMD64 diff --git a/libs/symcrypt/lib/aesCtrDrbg.c b/libs/symcrypt/lib/aesCtrDrbg.c new file mode 100644 index 00000000000..457e3f7fcd9 --- /dev/null +++ b/libs/symcrypt/lib/aesCtrDrbg.c @@ -0,0 +1,986 @@ +// +// aesCtrDrbg.c code for SP 800-90 AES-CTR-DRBG implementation +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +// +// This code is derived from the implementation already in use in CNG. +// + +#include "precomp.h" + +#define SYMCRYPT_RNG_AES_KEY_SIZE (32) +#define SYMCRYPT_RNG_AES_KEY_AND_V_SIZE (32 + 16) +#define SYMCRYPT_RNG_AES_MAX_REQUEST_SIZE (1<<16) +#define SYMCRYPT_RNG_AES_MAX_REQUESTS_PER_RESEED ((UINT64)1<<48) + +VOID +SYMCRYPT_CALL +SymCryptRngAesBcc( + _In_ PSYMCRYPT_AES_EXPANDED_KEY pKey, + _In_reads_( cbData ) PCBYTE pcbData, + _In_ SIZE_T cbData, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbResult ) +{ + // + //Length of input should always be multiple of the AES block size + // + SYMCRYPT_ASSERT(cbData % SYMCRYPT_AES_BLOCK_SIZE == 0); + + SymCryptWipe( pbResult, SYMCRYPT_AES_BLOCK_SIZE ); + + SymCryptAesCbcMac( pKey, pbResult, pcbData, cbData ); +} + + +VOID +SYMCRYPT_CALL +SymCryptRngAesDf( + _In_reads_(cbData) PCBYTE pcbData, + _In_ SIZE_T cbData, + _Out_writes_(SYMCRYPT_RNG_AES_INTERNAL_SEED_SIZE) PBYTE pbSeed ) +{ + //maximal input length + IV + padding + SYMCRYPT_ALIGN BYTE buf[SYMCRYPT_RNG_AES_MAX_SEED_SIZE + 3 * SYMCRYPT_AES_BLOCK_SIZE]; + PBYTE pb; + SIZE_T lenIvS; + + SYMCRYPT_ALIGN BYTE temp[SYMCRYPT_RNG_AES_KEY_AND_V_SIZE]; + SYMCRYPT_AES_EXPANDED_KEY aesKey; + PBYTE pX; + + SIZE_T i; + + C_ASSERT( sizeof( temp ) % SYMCRYPT_AES_BLOCK_SIZE == 0 ); + + // + // See SP800-90 section 10.4.2 + // + // Our buf contains the following data: + // - 16 bytes IV + // - 4 bytes L + // - 4 bytes N + // - up to SEEDLEN bytes input data + // - 1 byte 0x80 + // - zeroes to fill to a multiple of 16 + // + + SYMCRYPT_ASSERT( cbData >= SYMCRYPT_RNG_AES_MIN_RESEED_SIZE && + cbData <= SYMCRYPT_RNG_AES_MAX_SEED_SIZE ); + + // + // Initialize the entire buf to zero + // + SymCryptWipeKnownSize( buf, sizeof( buf ) ); + + // + // build the string S in buf[16...] + // + pb = &buf[ SYMCRYPT_AES_BLOCK_SIZE ]; + + // + // Set L; SP800-90 isn't clear, but we'll use MSB first as that is what is used elsewhere. + // + SYMCRYPT_STORE_MSBFIRST32( pb, (UINT32) cbData ); + pb += 4; + + // + // Set N + // + SYMCRYPT_STORE_MSBFIRST32( pb, SYMCRYPT_RNG_AES_INTERNAL_SEED_SIZE ); + pb += 4; + + // + // Set input_string + // + + memcpy( pb, pcbData, cbData ); + pb += cbData; + + // + // set padding + // + *pb++ = 0x80; + + while( (pb - buf) % SYMCRYPT_AES_BLOCK_SIZE != 0 ) + { +#pragma prefast( suppress: 26015, "Logic why this doesn't overflow the buf[] array is too complicated for prefast" ) + *pb++ = 0; + } + + lenIvS = pb - buf; // Length of IV & S together + + // + // Set up the inital key + // + + for( i = 0; i < SYMCRYPT_RNG_AES_KEY_SIZE; i++ ) + { + temp[i] = (BYTE) i; + } + SymCryptAesExpandKeyEncryptOnly( &aesKey, temp, SYMCRYPT_RNG_AES_KEY_SIZE ); + + + // + // Produce the 'temp' intermediate result. + // + + for( i=0; i< SYMCRYPT_RNG_AES_KEY_AND_V_SIZE / SYMCRYPT_AES_BLOCK_SIZE; i++ ) + { + // + // Update the IV with the right i value. + // i is only 0-2, so we only have to set a single byte + // + buf[3] = (BYTE) i; + + // + // Now we perform the BCC function, which is just CbcMac + // BCC(K,(IV||S)) + SymCryptRngAesBcc( &aesKey, buf, lenIvS, &temp[ i * SYMCRYPT_AES_BLOCK_SIZE ] ); + } + + // + // Second phase, produce the actual output + // + SymCryptAesExpandKeyEncryptOnly( &aesKey, temp, SYMCRYPT_RNG_AES_KEY_SIZE ); + pX = &temp[SYMCRYPT_RNG_AES_KEY_SIZE]; + + for( i=0; i < SYMCRYPT_RNG_AES_INTERNAL_SEED_SIZE; i += SYMCRYPT_AES_BLOCK_SIZE ) + { + SymCryptAesEncrypt( &aesKey, pX, pX ); + memcpy( &pbSeed[ i ], pX, SYMCRYPT_AES_BLOCK_SIZE ); + } + + SymCryptWipeKnownSize( buf, sizeof( buf ) ); + SymCryptWipeKnownSize( temp, sizeof( temp ) ); + SymCryptWipeKnownSize( &aesKey, sizeof( aesKey ) ); +} + + +VOID +SYMCRYPT_CALL +SymCryptRngAesGenerateBlocks( + _In_ PSYMCRYPT_AES_EXPANDED_KEY pAesKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pV, + _Out_writes_(cbRandom) PBYTE pbRandom, + _In_ SIZE_T cbRandom ) +// +// Internal function to generate output blocks from the state. +// cbRandom must be a multiple of the block size. +// +{ + UINT64 v; + SIZE_T cBlocks; + SIZE_T blocksToDo; + SIZE_T bytesToDo; + +// +// The roll-over of the counter is hard to test, especially since our +// NIST test vectors only cover small outputs. +// We have an option to test the output against a simpler (older) implementation +// to validate the proper working of the code. +// +#define TEST_AGAINST_OLD_CODE 0 +#if TEST_AGAINST_OLD_CODE + BYTE Vcopy[16]; + BYTE buf[16]; + PCBYTE pbCheck = pbRandom; + SIZE_T cbCheck = cbRandom; + + memcpy( Vcopy, pV, 16 ); +#endif + + // + // cbRandom must be a multiple of BLOCK_LEN and > 0. + // + SYMCRYPT_ASSERT( (cbRandom & (SYMCRYPT_AES_BLOCK_SIZE-1)) == 0 ); + + cBlocks = cbRandom / SYMCRYPT_AES_BLOCK_SIZE; + + // + // We violate the write-once rule here by wiping the output buffer and then + // filling it with the CTR-mode encryption. + // This is safe because the caller only learns the proper output anyway. + // + SymCryptWipe( pbRandom, cbRandom ); + + // + // This loop is a little complicated because we need to pre-increment the 128-bit value V + // and the SymCryptAesCtrMsb64 function does a 64-bit post-increment. + // + while( cBlocks != 0 ) + { + // Increment V + v = SYMCRYPT_LOAD_MSBFIRST64( &pV[8] ) + 1; + SYMCRYPT_STORE_MSBFIRST64( &pV[8], v ); + SYMCRYPT_STORE_MSBFIRST64( &pV[0], SYMCRYPT_LOAD_MSBFIRST64( &pV[0] ) + (v == 0) ); + + // + // The SymCryptAesCtrMsb64 routine will increment the last 64 bits of the V value, + // but not handle the carry to the first 64 bits. + // We limit how many block we do so that we never cross this boundary. + // SymCryptAesCtrMsb64 does a post-increment, so it may increment the last 64 bits + // to zero as long as we don't rely on the V value afterwards. + // As one-in-2^64 code is not testable, we terminate the Msb64 call earlier, and + // much earlier on CHKed builds. + // +#if SYMCRYPT_DEBUG +#define MAX_CTRMSB64_BLOCKS (1 << 3) // very small; overflow will be triggered by any reasonable test +#else +#define MAX_CTRMSB64_BLOCKS (1 << 10) // increase when we have this well-tested +#endif + // + // 1 + (~v & mask) is the value you can add to v so that the mask bits of the sum + // end up to be zero. It is in the range 1 .. mask+1 + // + blocksToDo = SYMCRYPT_MIN( cBlocks, 1 + ( (~v) & (MAX_CTRMSB64_BLOCKS - 1) ) ); + + bytesToDo = blocksToDo * SYMCRYPT_AES_BLOCK_SIZE; + SYMCRYPT_ASSERT( bytesToDo <= cbRandom ); + SymCryptAesCtrMsb64( pAesKey, &pV[0], pbRandom, pbRandom, bytesToDo ); + pbRandom += bytesToDo; + cbRandom -= bytesToDo; // only used for prefast assertions; optimized away in shipping code + cBlocks -= blocksToDo; + + // + // Post-decrement the V block to compensate for the post-increment of the Msb64 function + // + v += blocksToDo - 1; + SYMCRYPT_ASSERT( v != 0 ); + + SYMCRYPT_STORE_MSBFIRST64( &pV[8], v ); + // No need to carry to the first half of V here, it cannot happen + } + +#if TEST_AGAINST_OLD_CODE + // + // We tried to use the CtrMsb64 mode to generate the blocks, but that leads to + // a number of complications. + // The lack of carry means we end up with code paths that run once per 2^64 blocks + // or so, and that is very hard to test. + // Furthermore, CtrMsb64 uses post-increment, whereas AES-CTR_DRBG uses pre-increment. + // That adds sufficient extra complications and testing problems that we went back + // to the solution below. + // + + while( cbCheck != 0 ) + { + SYMCRYPT_ASSERT( cbCheck >= SYMCRYPT_AES_BLOCK_SIZE ); // Keep prefast happy + // + // Increment the 128-bit block V MSByte first. + // + v = SYMCRYPT_LOAD_MSBFIRST64( &Vcopy[8] ) + 1; + SYMCRYPT_STORE_MSBFIRST64( &Vcopy[8], v ); + if( v == 0 ) + { + // + // This almost never happens. + // Using an if() is not side-channel safe, but in this case + // the side channel does not reveal anything that actually hurts the + // security of the algorithm. + // + SYMCRYPT_STORE_MSBFIRST64( Vcopy, 1 + LOAD_MSBFIRST64( Vcopy ) ); + } + + SymCryptAesEncrypt( pAesKey, Vcopy, buf ); + if( memcmp( buf, pbCheck, 16 ) != 0 ) + { + SymCryptFatal( 'OLD?' ); + } + pbCheck += SYMCRYPT_AES_BLOCK_SIZE; + cbCheck -= SYMCRYPT_AES_BLOCK_SIZE; + } +#endif +} + +FORCEINLINE +int +SymCryptRngAesAreBlocksIdentical( + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PCBYTE pSrc1, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PCBYTE pSrc2 ) +// +// return 1 if the blocks are identical, 0 if they are different. +// +{ + SYMCRYPT_UNALIGNED const SIZE_T * p1 = (SYMCRYPT_UNALIGNED const SIZE_T *) pSrc1; + SYMCRYPT_UNALIGNED const SIZE_T * p2 = (SYMCRYPT_UNALIGNED const SIZE_T *) pSrc2; + + SIZE_T tmp; + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_ARM + + C_ASSERT( sizeof( SIZE_T ) == 4 ); + tmp = (p1[0] ^ p2[0]) | (p1[1] ^ p2[1]) | (p1[2] ^ p2[2]) | (p1[3] ^ p2[3]); + +#elif SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM64 + + C_ASSERT( sizeof( SIZE_T ) == 8 ); + tmp = (p1[0] ^ p2[0]) | (p1[1] ^ p2[1]); + +#else + + SIZE_T i; + + C_ASSERT( 16 % sizeof( SIZE_T ) == 0 ); + + tmp = 0; + for( i=0; i < 16/sizeof( SIZE_T ); i ++ ) + { + tmp |= p1[i] ^ p2[i]; + } + +#endif + + return tmp == 0; +} + + +VOID +SYMCRYPT_CALL +SymCryptRngAesCheckBlocksNotIdentical( + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbPreviousBlock, + _In_reads_( cbData ) PCBYTE pcbData, + SIZE_T cbData ) +{ + SIZE_T identical; + SIZE_T i; + + SYMCRYPT_ASSERT( ((cbData & 15) == 0) && cbData > 0 ); + + identical = SymCryptRngAesAreBlocksIdentical( pbPreviousBlock, pcbData ); + + for( i = SYMCRYPT_AES_BLOCK_SIZE; i < cbData; i += SYMCRYPT_AES_BLOCK_SIZE ) + { + SYMCRYPT_ASSERT( cbData >= i + SYMCRYPT_AES_BLOCK_SIZE ); + identical |= SymCryptRngAesAreBlocksIdentical( &pcbData[i-SYMCRYPT_AES_BLOCK_SIZE], &pcbData[ i ] ); + } + + memcpy( pbPreviousBlock, &pcbData[cbData - SYMCRYPT_AES_BLOCK_SIZE], SYMCRYPT_AES_BLOCK_SIZE ); + + // + // The structure of AES-CTR-DRBG makes it impossible for two consecutive blocks of a single request + // to be equal. The only way this could happen is if the first block of one request is the same as + // the last block of the previous request. But the probability of this happening is 2^{-128}. + // This never happens, so the whole check is technically useless. + // Nevertheless, it is required by FIPS 140-2, so we have to implement it, + // but we don't have to handle the error usefully in any way. + // (Trying to handle this error sensibly is far too complicated, and adds far more danger from code + // bugs than it is worth. It is much better to just treat it as a fatal occurrence.) + // + + if( identical ) + { + SymCryptFatal( 'acdi' ); + } +} + +VOID +SYMCRYPT_CALL +SymCryptRngAesUpdate( + _Inout_ PSYMCRYPT_RNG_AES_STATE pState, + _In_reads_opt_( SYMCRYPT_RNG_AES_INTERNAL_SEED_SIZE ) PCBYTE pbProvidedData, + _In_opt_ PSYMCRYPT_AES_EXPANDED_KEY pAesKey) +// +// Implement the CTR_DRBG Update function. +// pbProvidedData is optional, but if provided must always be exactly seedlen bits. +// pAesKey is the already expanded key of the RngState. This is optional, and only has +// to be provided if the caller already has it. +// +{ + SYMCRYPT_AES_EXPANDED_KEY aesKey; + PSYMCRYPT_AES_EXPANDED_KEY pKey; + SYMCRYPT_ALIGN BYTE buf[SYMCRYPT_AES_BLOCK_SIZE]; + + if(NULL == pAesKey) + { + SymCryptAesExpandKeyEncryptOnly( &aesKey, pState->keyAndV, SYMCRYPT_RNG_AES_KEY_SIZE ); + pKey = &aesKey; + } + else + { + pKey = pAesKey; + } + + // + // Copy the V value so that we can overwrite it safely. + // + + memcpy( buf, &pState->keyAndV[SYMCRYPT_RNG_AES_KEY_SIZE], sizeof( buf ) ); + + SymCryptRngAesGenerateBlocks( + pKey, + buf, // pV + pState->keyAndV, // pbRandom + sizeof( pState->keyAndV) ); // cbRandom + + if( pbProvidedData != NULL ) + { + // XOR provided data in + SymCryptXorBytes( pState->keyAndV, pbProvidedData, pState->keyAndV, SYMCRYPT_RNG_AES_INTERNAL_SEED_SIZE ); + } + + SymCryptWipeKnownSize( buf, sizeof( buf ) ); + + // + // Only wipe the key if necessary. + // + if( pKey == &aesKey ) + { + SymCryptWipeKnownSize( &aesKey, sizeof( aesKey )); + } +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRngAesGenerateSmall( + _Inout_ PSYMCRYPT_RNG_AES_STATE pRngState, + _Out_writes_( cbRandom ) PBYTE pbRandom, + SIZE_T cbRandom, + _In_reads_opt_( cbAdditionalInput ) PCBYTE pbAdditionalInput, + SIZE_T cbAdditionalInput ) +// +// This is the Generate function of our SP800-90 compliant implementation. +// It follows the method specified in SP800-90A 10.2.1.5.2 +// +{ + SYMCRYPT_AES_EXPANDED_KEY aesKey; + SYMCRYPT_ALIGN BYTE buf[SYMCRYPT_AES_BLOCK_SIZE]; + SYMCRYPT_ALIGN BYTE abSeed[SYMCRYPT_RNG_AES_INTERNAL_SEED_SIZE]; + + // + // SP 800-90 9.3.1 requires a check on the length of the request. + // + if( cbRandom > SYMCRYPT_RNG_AES_MAX_REQUEST_SIZE ) + { + return SYMCRYPT_WRONG_DATA_SIZE; + } + // + // The requestCounter test is useless as it can never happen. (It would require + // 2^48 calls to this function to trigger this error.) + // Unfortunately, SP800-90 section 11 requires a test of this error, so we have + // to implement the error. + // + if( pRngState->requestCounter > SYMCRYPT_RNG_AES_MAX_REQUESTS_PER_RESEED ) + { + return SYMCRYPT_FIPS_FAILURE; + } + + if( pbAdditionalInput != NULL ) + { + // Update additional input using Derivation function + SymCryptRngAesDf( pbAdditionalInput, cbAdditionalInput, abSeed ); + pbAdditionalInput = &abSeed[0]; + + // Update state with modified additional input + SymCryptRngAesUpdate( pRngState, pbAdditionalInput, NULL ); + } + + SymCryptAesExpandKeyEncryptOnly( &aesKey, pRngState->keyAndV, SYMCRYPT_RNG_AES_KEY_SIZE ); + + if( cbRandom >= SYMCRYPT_AES_BLOCK_SIZE ) + { + SIZE_T wholeBlocks = cbRandom & ~(SYMCRYPT_AES_BLOCK_SIZE - 1); + SymCryptRngAesGenerateBlocks( &aesKey, + &pRngState->keyAndV[ SYMCRYPT_RNG_AES_KEY_SIZE], + pbRandom, + wholeBlocks ); + if( pRngState->fips140_2Check ) + { + SymCryptRngAesCheckBlocksNotIdentical( pRngState->previousBlock, pbRandom, wholeBlocks ); + } + pbRandom += wholeBlocks; + cbRandom -= wholeBlocks; + } + + if( cbRandom > 0 ) + { + SYMCRYPT_ASSERT( cbRandom < SYMCRYPT_AES_BLOCK_SIZE ); + SymCryptRngAesGenerateBlocks( &aesKey, + &pRngState->keyAndV[ SYMCRYPT_RNG_AES_KEY_SIZE], + buf, + sizeof( buf ) ); + if( pRngState->fips140_2Check ) + { + SymCryptRngAesCheckBlocksNotIdentical( pRngState->previousBlock, buf, sizeof( buf ) ); + } + + memcpy( pbRandom, buf, cbRandom ); + SymCryptWipeKnownSize( buf, sizeof( buf ) ); + } + + SymCryptRngAesUpdate( pRngState, pbAdditionalInput, &aesKey ); + + ++pRngState->requestCounter; + + SymCryptWipeKnownSize( &aesKey, sizeof( aesKey ) ); + SymCryptWipeKnownSize( abSeed, sizeof( abSeed ) ); + + return SYMCRYPT_NO_ERROR; +} + + +_Use_decl_annotations_ +SYMCRYPT_NOINLINE +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRngAesInstantiate( PSYMCRYPT_RNG_AES_STATE pRngState, + PCBYTE pcbSeedMaterial, + SIZE_T cbSeedMaterial ) +// +// This function creates a new SP 800-90 AES_CTR_DRBG instance. +// Our code is structured differently from what SP 800-90 assumes. +// At this point in time, the entropy has already been collected and it is +// passed to this function. Thus, there is no check for failing to get +// the entropy. If entropy collection fails, the caller of this function +// will generate an error. (Actually, we only choose to instantiate a FIPS-compliant +// SP 800-90 DRBG when we do have good entropy available, so there is never an +// error that we don't have the required entropy.) +// +{ + if( cbSeedMaterial < SYMCRYPT_RNG_AES_MIN_INSTANTIATE_SIZE ) + { + return SYMCRYPT_EXTERNAL_FAILURE; + } + + // + // Instantiation of a new state is identical to setting the state to zero + // and then performing a reseed with the same seed material. + // + // See SP 800-90 10.2.1.3.2 & 10.2.1.4.2 + // + SymCryptWipeKnownSize( pRngState, sizeof( *pRngState ) ); + + SYMCRYPT_SET_MAGIC( pRngState ); + + return SymCryptRngAesReseed( pRngState, pcbSeedMaterial, cbSeedMaterial ); +} + +_Use_decl_annotations_ +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptRngAesGenerate( PSYMCRYPT_RNG_AES_STATE pRngState, + PBYTE pbRandom, + SIZE_T cbRandom ) +// +// For FIPS compliance purposes, this is NOT the generate function of the DRBG. +// The generate function is SymCryptRngAesGenerateSmall. +// This is a wrapper around the generate function that supports larger output +// sizes, and handles any errors by making them fatal. +// +{ + SYMCRYPT_ERROR scError; + + SYMCRYPT_CHECK_MAGIC( pRngState ); + + while( cbRandom > SYMCRYPT_RNG_AES_MAX_REQUEST_SIZE ) + { + + scError = SymCryptRngAesGenerateSmall( pRngState, pbRandom, SYMCRYPT_RNG_AES_MAX_REQUEST_SIZE, NULL, 0 ); + if( scError != SYMCRYPT_NO_ERROR ) + { + SymCryptFatal( 'acdx' ); + } + pbRandom += SYMCRYPT_RNG_AES_MAX_REQUEST_SIZE; + cbRandom -= SYMCRYPT_RNG_AES_MAX_REQUEST_SIZE; + } + + if( cbRandom > 0 ) + { + scError = SymCryptRngAesGenerateSmall( pRngState, pbRandom, cbRandom, NULL, 0 ); + if( scError != SYMCRYPT_NO_ERROR ) + { + SymCryptFatal( 'acdx' ); + } + } +} + +_Use_decl_annotations_ +SYMCRYPT_NOINLINE +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRngAesReseed( PSYMCRYPT_RNG_AES_STATE pRngState, + PCBYTE pcbSeedMaterial, + SIZE_T cbSeedMaterial ) +{ + SYMCRYPT_ALIGN BYTE abSeed[SYMCRYPT_RNG_AES_INTERNAL_SEED_SIZE]; + + SYMCRYPT_CHECK_MAGIC( pRngState ); + + // + // For a reseed, the minimum # bits is the security strength, or the key size. + // We retain the same maximum as that protects our own internal buffers. + // + if (cbSeedMaterial < SYMCRYPT_RNG_AES_MIN_RESEED_SIZE || + cbSeedMaterial > SYMCRYPT_RNG_AES_MAX_SEED_SIZE ) + { + return SYMCRYPT_EXTERNAL_FAILURE; // bug is external to SymCrypt (i.e. the caller) + } + + // + // We do not perform the FIPS-required reseed self-test here. + // Rather, we have a function that external callers can use to implement that test before + // calling this reseed function. + // This allows callers that are not interested in FIPS certification to skip the test. + // + + SymCryptRngAesDf( pcbSeedMaterial, cbSeedMaterial, abSeed ); + + SymCryptRngAesUpdate( pRngState, abSeed, NULL ); + + pRngState->requestCounter = 1; + + SymCryptWipeKnownSize( abSeed, sizeof( abSeed ) ); + + return SYMCRYPT_NO_ERROR; +} + +_Use_decl_annotations_ +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptRngAesUninstantiate( PSYMCRYPT_RNG_AES_STATE pRngState ) +{ + SymCryptWipeKnownSize( pRngState, sizeof( *pRngState ) ); +} + +//////////////////////////////////////////////////////////////////////////// +// Self test + +// +// The test vector is from the NIST DRBG Test Vectors file +// +static const BYTE g_abInstantiateEntropyInputPlusNonce[] = +{ + // Entropy input + + 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, + 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F, + 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17, + 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F, + 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27, + 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, + + // Nonce + 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27, + 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F, + +}; + + +static const BYTE g_abReseedEntropy[] = +{ + + 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87, + 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F, + 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97, + 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F, + 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7, + 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF +}; + +static const BYTE g_abOutput1[ 32 ] = +{ + 0xD1,0xE9,0xC7,0x37,0xB6,0xEB,0xAE,0xD7, + 0x65,0xA0,0xD4,0xE4,0xC6,0xEA,0xEB,0xE2, + 0x67,0xF5,0xE9,0x19,0x36,0x80,0xFD,0xFF, + 0xA6,0x2F,0x48,0x65,0xB3,0xF0,0x09,0xEC, +}; + +static const BYTE g_expectedStateAfterInstantiate[ SYMCRYPT_RNG_AES_KEY_AND_V_SIZE ] = +{ + //key + 0x8C,0x10,0xB6,0x58,0x44,0x0C,0x71,0x35, + 0x64,0x9D,0xC7,0x7B,0xE6,0xE5,0x75,0xCE, + 0x87,0xE7,0x48,0x90,0x83,0x9B,0x89,0x59, + 0x14,0x17,0xAF,0xAD,0x14,0xB2,0x26,0xD5, + //V + 0xB4,0x03,0x6B,0x1D,0xBA,0x04,0x3A,0xE6, + 0x55,0xAC,0xD6,0x46,0xEC,0x5A,0xD3,0x5C, +}; + +static const BYTE g_expectedStateAfterReseed[ SYMCRYPT_RNG_AES_KEY_AND_V_SIZE ] = +{ + //key + 0x17,0x98,0xC0,0xDF,0x09,0x69,0x6A,0x46, + 0x19,0x46,0xFE,0x6D,0x68,0x7D,0x8C,0xC8, + 0x3F,0xEE,0xF1,0x22,0xF3,0xBB,0xC5,0xF2, + 0x9D,0xAC,0x85,0x10,0xF3,0x4A,0xF0,0x15, + //V + 0x0B,0xF3,0x34,0x4D,0xF5,0x29,0x27,0x6B, + 0x0D,0x5B,0xBC,0x83,0x9B,0xD3,0x65,0x6A, +}; + +static const BYTE g_expectedStateAfterGenerate[ SYMCRYPT_RNG_AES_KEY_AND_V_SIZE ] = +{ + //key + 0x28, 0xbc, 0x65, 0xa8, 0x6a, 0xb7, 0xc7, 0x4e, 0xdf, 0x4b, 0xb8, 0x72, 0x87, 0xd3, 0x4f, 0xbb, + 0x8d, 0x6f, 0x16, 0xd7, 0xb9, 0x1b, 0x6a, 0xbb, 0xee, 0x7b, 0x88, 0x86, 0x5b, 0x0f, 0xc7, 0xbd, + //V + 0xb7, 0x46, 0x11, 0xf3, 0x92, 0x95, 0xa6, 0x25, 0x7c, 0x39, 0x98, 0x4c, 0x9c, 0x09, 0x9b, 0x30, +}; + + +VOID +SYMCRYPT_CALL +SymCryptRngAesTestInstantiate( PSYMCRYPT_RNG_AES_STATE pRngState ) +// +// Test the Instantiate function on the passed instance. Leave it +// in the initialized state for the test vector. +// +{ + SYMCRYPT_ERROR scError; + // + // First test the error handling + // +#pragma prefast( suppress: 26060 6309 28020, "Deliberate test of invalid parameter"); + scError = SymCryptRngAesInstantiate( pRngState, NULL, 327 ); + if( scError == SYMCRYPT_NO_ERROR ) + { + SymCryptFatal( 'aci1' ); + } + + + scError = SymCryptRngAesInstantiate( pRngState, + g_abInstantiateEntropyInputPlusNonce, + sizeof( g_abInstantiateEntropyInputPlusNonce ) + ); + + SymCryptInjectError( pRngState->keyAndV, SYMCRYPT_RNG_AES_KEY_AND_V_SIZE ); + + if ( scError != SYMCRYPT_NO_ERROR || + 0 != memcmp( pRngState->keyAndV, + g_expectedStateAfterInstantiate, + SYMCRYPT_RNG_AES_KEY_AND_V_SIZE )) + { + SymCryptFatal( 'aci2' ); + } +} + +VOID +SYMCRYPT_CALL +SymCryptRngAesTestReseed( PSYMCRYPT_RNG_AES_STATE pRngState ) +{ + SYMCRYPT_ERROR scError; + + // + // Set the state to a known state + // + SYMCRYPT_SET_MAGIC( pRngState ); + memcpy( pRngState->keyAndV, g_expectedStateAfterInstantiate, SYMCRYPT_RNG_AES_KEY_AND_V_SIZE ); + pRngState->requestCounter = 7; + pRngState->fips140_2Check = FALSE; + + // + // Test error handling + // +#pragma prefast(suppress: 26060 6309 28020, "Deliberate test of invalid parameter"); + scError = SymCryptRngAesReseed( pRngState, NULL, 597 ); + if( scError == SYMCRYPT_NO_ERROR ) + { + SymCryptFatal( 'acr1' ); + } + + scError = SymCryptRngAesReseed( pRngState, g_abReseedEntropy, sizeof( g_abReseedEntropy ) ); + + SymCryptInjectError( pRngState->keyAndV, SYMCRYPT_RNG_AES_KEY_AND_V_SIZE ); + + if ( scError != SYMCRYPT_NO_ERROR || + 0 != memcmp( pRngState->keyAndV, + g_expectedStateAfterReseed, + SYMCRYPT_RNG_AES_KEY_AND_V_SIZE ) ) + { + SymCryptFatal( 'acr2' ); + } +} + +VOID +SYMCRYPT_CALL +SymCryptRngAesTestGenerate( PSYMCRYPT_RNG_AES_STATE pRngState ) +{ + BYTE abOutput[2*SYMCRYPT_AES_BLOCK_SIZE]; + SYMCRYPT_ERROR scError; + + // + // Set the state to a known value + // + SYMCRYPT_SET_MAGIC( pRngState ); + memcpy( pRngState->keyAndV, g_expectedStateAfterReseed, SYMCRYPT_RNG_AES_KEY_AND_V_SIZE ); + pRngState->requestCounter = 7; + pRngState->fips140_2Check = FALSE; + + // + // Test the error handling + // - Too many requests since last reseed + // - Too many bytes in request + // + + pRngState->requestCounter = SYMCRYPT_RNG_AES_MAX_REQUESTS_PER_RESEED + 1; + scError = SymCryptRngAesGenerateSmall( pRngState, abOutput, sizeof( g_abOutput1 ), NULL, 0 ); + + if( scError == SYMCRYPT_NO_ERROR ) + { + SymCryptFatal( 'acg1' ); + } + pRngState->requestCounter = 7; + +#pragma prefast( suppress: 6202 26000, "buffer size of cbOutput is purposely incorrect"); + scError = SymCryptRngAesGenerateSmall( pRngState, abOutput, SYMCRYPT_RNG_AES_MAX_REQUEST_SIZE + 1, NULL, 0 ); + + if( scError == SYMCRYPT_NO_ERROR ) + { + SymCryptFatal( 'acg2' ); + } + + // + // Now test for correct output data. + // + scError = SymCryptRngAesGenerateSmall( pRngState, abOutput, sizeof( g_abOutput1 ), NULL, 0 ); + + SymCryptInjectError( abOutput, sizeof( abOutput ) ); + + if( scError != SYMCRYPT_NO_ERROR || memcmp( abOutput, g_abOutput1, sizeof( g_abOutput1 ) ) != 0 ) + { + SymCryptFatal( 'acg3' ); + } + + // + // And test for the correct resulting state + // + SymCryptInjectError( pRngState->keyAndV, SYMCRYPT_RNG_AES_KEY_AND_V_SIZE ); + + if ( 0 != memcmp( pRngState->keyAndV, + g_expectedStateAfterGenerate, + SYMCRYPT_RNG_AES_KEY_AND_V_SIZE ) ) + { + SymCryptFatal( 'acg4' ); + } +} + + +VOID +SYMCRYPT_CALL +SymCryptRngAesTestUninstantiate( PSYMCRYPT_RNG_AES_STATE pRngState ) +{ + const SIZE_T * p = (const SIZE_T *) pRngState; + SIZE_T t; + SIZE_T i; + + C_ASSERT( sizeof( *pRngState ) % sizeof( SIZE_T ) == 0 ); // This is true on all our platforms. + + SYMCRYPT_CHECK_MAGIC( pRngState ); + + SymCryptRngAesUninstantiate( pRngState ); + + t = 0; + for( i=0; i< sizeof( *pRngState ) / sizeof( SIZE_T ); i ++ ) + { + t |= p[i]; + } + + if( t != 0 ) + { + SymCryptFatal( 'acdu' ); + } +} + +VOID +SYMCRYPT_CALL +SymCryptRngAesInstantiateSelftest(void) +{ + SYMCRYPT_RNG_AES_STATE rng; + + SymCryptRngAesTestInstantiate( &rng ); + + // + // Uninstantiate has to be tested whenever another function is tested. + // + SymCryptRngAesTestUninstantiate( &rng ); +} + +VOID +SYMCRYPT_CALL +SymCryptRngAesReseedSelftest(void) +{ + SYMCRYPT_RNG_AES_STATE rng; + + SymCryptRngAesTestReseed( &rng ); + + // + // Uninstantiate has to be tested whenever another function is tested. + // + SymCryptRngAesTestUninstantiate( &rng ); +} + +VOID +SYMCRYPT_CALL +SymCryptRngAesGenerateSelftest(void) +{ + SYMCRYPT_RNG_AES_STATE rng; + + SymCryptRngAesTestGenerate( &rng ); + + // + // Uninstantiate has to be tested whenever another function is tested. + // + SymCryptRngAesTestUninstantiate( &rng ); +} + + +/////////////////////////////////////////////////////////////////// +// AES-CTR_DRGB with FIPS 140-2 continuous self-test +// + + +_Use_decl_annotations_ +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRngAesFips140_2Instantiate( PSYMCRYPT_RNG_AES_FIPS140_2_STATE pRngState, + PCBYTE pcbSeedMaterial, + SIZE_T cbSeedMaterial ) +{ + SYMCRYPT_ERROR scError; + + scError = SymCryptRngAesInstantiate( &pRngState->rng, pcbSeedMaterial, cbSeedMaterial ); + + if( scError == SYMCRYPT_NO_ERROR ) + { + // + // Generate the first block of output and store it so that we can compare future blocks. + // + SymCryptRngAesGenerate( &pRngState->rng, pRngState->rng.previousBlock, sizeof( pRngState->rng.previousBlock ) ); + pRngState->rng.fips140_2Check = TRUE; + } + + return scError; +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptRngAesFips140_2Generate( PSYMCRYPT_RNG_AES_FIPS140_2_STATE pRngState, + PBYTE pbRandom, + SIZE_T cbRandom ) +{ + SymCryptRngAesGenerate( &pRngState->rng, pbRandom, cbRandom ); +} + +_Use_decl_annotations_ +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRngAesFips140_2Reseed( PSYMCRYPT_RNG_AES_FIPS140_2_STATE pRngState, + PCBYTE pcbSeedMaterial, + SIZE_T cbSeedMaterial ) +{ + return SymCryptRngAesReseed( &pRngState->rng, pcbSeedMaterial, cbSeedMaterial ); +} + + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptRngAesFips140_2Uninstantiate( PSYMCRYPT_RNG_AES_FIPS140_2_STATE pRngState ) +{ + SymCryptRngAesUninstantiate( &pRngState->rng ); +} diff --git a/libs/symcrypt/lib/aescmac.c b/libs/symcrypt/lib/aescmac.c new file mode 100644 index 00000000000..7af0e31bcd9 --- /dev/null +++ b/libs/symcrypt/lib/aescmac.c @@ -0,0 +1,258 @@ +// +// aescmac.c Implementation of the AES-CMAC block cipher mode +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +const SYMCRYPT_MAC SymCryptAesCmacAlgorithm_fast = { + SymCryptAesCmacExpandKey, + SymCryptAesCmacInit, + SymCryptAesCmacAppend, + SymCryptAesCmacResult, + sizeof(SYMCRYPT_AES_CMAC_EXPANDED_KEY), + sizeof(SYMCRYPT_AES_CMAC_STATE), + SYMCRYPT_AES_CMAC_RESULT_SIZE, + NULL, + 0, +}; + +const PCSYMCRYPT_MAC SymCryptAesCmacAlgorithm = &SymCryptAesCmacAlgorithm_fast; + +VOID +SYMCRYPT_CALL +SymCryptCmacMunge( + _Inout_updates_bytes_(SYMCRYPT_AES_BLOCK_SIZE) BYTE buf[SYMCRYPT_AES_BLOCK_SIZE] ) +{ + SIZE_T carry = 0; + SIZE_T tmp; + int i; + + for( i=15; i>=0; i-- ) + { + tmp = buf[i]; + buf[i] = ((tmp << 1) | carry) & 0xff; + carry = tmp >> 7; + } + + buf[15] ^= (0 - carry) & 0x87; // This is the R_128 value from SP 800-38B 5.3 +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptAesCmacExpandKey( + _Out_ PSYMCRYPT_AES_CMAC_EXPANDED_KEY pExpandedKey, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + SYMCRYPT_ALIGN BYTE buf[SYMCRYPT_AES_BLOCK_SIZE]; + + scError = SymCryptAesExpandKey( &pExpandedKey->aesKey, pbKey, cbKey ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + SymCryptWipeKnownSize( buf, sizeof( buf ) ); + + SymCryptAesEncrypt( &pExpandedKey->aesKey, buf, buf ); + + SymCryptCmacMunge( buf ); + memcpy( &pExpandedKey->K1, buf, sizeof( buf ) ); + SymCryptCmacMunge( buf ); + memcpy( &pExpandedKey->K2, buf, sizeof( buf ) ); + + SymCryptWipeKnownSize( buf, sizeof( buf ) ); + + SYMCRYPT_SET_MAGIC( pExpandedKey ); + +cleanup: + + return scError; +} + + +VOID +SYMCRYPT_CALL +SymCryptAesCmacKeyCopy( + _In_ PCSYMCRYPT_AES_CMAC_EXPANDED_KEY pSrc, + _Out_ PSYMCRYPT_AES_CMAC_EXPANDED_KEY pDst ) +{ + SYMCRYPT_CHECK_MAGIC( pSrc ); + SymCryptAesKeyCopy( &pSrc->aesKey, &pDst->aesKey ); + memcpy( pDst->K1, pSrc->K1, sizeof( pDst->K1 ) ); + memcpy( pDst->K2, pSrc->K2, sizeof( pDst->K2 ) ); + SYMCRYPT_SET_MAGIC( pDst ); +} + + +VOID +SYMCRYPT_CALL +SymCryptAesCmac( + _In_ PSYMCRYPT_AES_CMAC_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_AES_CMAC_RESULT_SIZE ) PBYTE pbResult ) +{ + SYMCRYPT_AES_CMAC_STATE state; + + SymCryptAesCmacInit( &state, pExpandedKey ); + SymCryptAesCmacAppend( &state, pbData, cbData ); + SymCryptAesCmacResult( &state, pbResult ); + + SymCryptWipeKnownSize( &state, sizeof( state ) ); +} + + +VOID +SYMCRYPT_CALL +SymCryptAesCmacStateCopy( + _In_ PCSYMCRYPT_AES_CMAC_STATE pSrc, + _In_opt_ PCSYMCRYPT_AES_CMAC_EXPANDED_KEY pExpandedKey, + _Out_ PSYMCRYPT_AES_CMAC_STATE pDst ) +{ + SYMCRYPT_CHECK_MAGIC( pSrc ); + *pDst = *pSrc; + + if( pExpandedKey == NULL ) + { + SYMCRYPT_CHECK_MAGIC( pSrc->pKey ); + pDst->pKey = pSrc->pKey; + } + else + { + SYMCRYPT_CHECK_MAGIC( pExpandedKey ); + pDst->pKey = pExpandedKey; + } + + SYMCRYPT_SET_MAGIC( pDst ); +} + +VOID +SYMCRYPT_CALL +SymCryptAesCmacInit( + _Out_ PSYMCRYPT_AES_CMAC_STATE pState, + _In_ PCSYMCRYPT_AES_CMAC_EXPANDED_KEY pExpandedKey) +{ + SYMCRYPT_CHECK_MAGIC( pExpandedKey ); + + pState->bytesInBuf = 0; + SymCryptWipeKnownSize( pState->chain, sizeof( pState->chain ) ); + pState->pKey = pExpandedKey; + + SYMCRYPT_SET_MAGIC( pState ); +} + +VOID +SYMCRYPT_CALL +SymCryptAesCmacAppend( + _Inout_ PSYMCRYPT_AES_CMAC_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + + SYMCRYPT_CHECK_MAGIC( pState ); + + if( pState->bytesInBuf != 0 ) + { + SIZE_T freeInBuf = SYMCRYPT_AES_BLOCK_SIZE - pState->bytesInBuf; + SYMCRYPT_ASSERT( freeInBuf < SYMCRYPT_AES_BLOCK_SIZE ); + + if( cbData <= freeInBuf ) + { + // Do nothing. + // the data will be copied into the buf at the end of this function + // + } + else + { + memcpy( &pState->buf[pState->bytesInBuf], pbData, freeInBuf ); + pbData += freeInBuf; + cbData -= freeInBuf; + SymCryptAesCbcMac( &pState->pKey->aesKey, &pState->chain[0], &pState->buf[0], SYMCRYPT_AES_BLOCK_SIZE ); + pState->bytesInBuf = 0; + } + } + + // + // At this point, either pState->bytesInBuf == 0, or it is !=0 but cbData is small enough that all the + // data will still fit in the buffer without further processing. + // + + if( cbData > SYMCRYPT_AES_BLOCK_SIZE ) + { + SIZE_T bytesToDo = (cbData-1) & ~(SIZE_T)(SYMCRYPT_AES_BLOCK_SIZE - 1); + SymCryptAesCbcMac( &pState->pKey->aesKey, &pState->chain[0], pbData, bytesToDo ); + pbData += bytesToDo; + cbData -= bytesToDo; + } + + if( cbData > 0 ) + { + memcpy( &pState->buf[pState->bytesInBuf], pbData, cbData ); + pState->bytesInBuf += cbData; + } +} + + +VOID +SYMCRYPT_CALL +SymCryptAesCmacResult( + _Inout_ PSYMCRYPT_AES_CMAC_STATE pState, + _Out_writes_( SYMCRYPT_AES_CMAC_RESULT_SIZE ) PBYTE pbResult ) +{ + SYMCRYPT_CHECK_MAGIC( pState ); + + if( pState->bytesInBuf < SYMCRYPT_AES_BLOCK_SIZE ) + { + SymCryptWipe( &pState->buf[pState->bytesInBuf + 1], SYMCRYPT_AES_BLOCK_SIZE - pState->bytesInBuf - 1 ); + pState->buf[pState->bytesInBuf] = 0x80; + SymCryptXorBytes( &pState->buf[0], &pState->pKey->K2[0], &pState->buf[0], SYMCRYPT_AES_BLOCK_SIZE ); + } + else + { + SymCryptXorBytes( &pState->buf[0], &pState->pKey->K1[0], &pState->buf[0], SYMCRYPT_AES_BLOCK_SIZE ); + } + + SymCryptAesCbcMac( &pState->pKey->aesKey, &pState->chain[0], &pState->buf[0], SYMCRYPT_AES_BLOCK_SIZE ); + memcpy( pbResult, &pState->chain[0], SYMCRYPT_AES_BLOCK_SIZE ); + + // + // Put the state back in the original starting state, + // and wipe any traces of the data. + // + pState->bytesInBuf = 0; + SymCryptWipeKnownSize( pState->chain, sizeof( pState->chain ) ); + SymCryptWipeKnownSize( pState->buf, sizeof( pState->buf ) ); +} + + + +static const BYTE aesCmacKat[SYMCRYPT_AES_CMAC_RESULT_SIZE] = { + 0x0a, 0x54, 0xa6, 0xa4, 0x25, 0xd4, 0x84, 0x38, 0xc3, 0xf8, 0xbb, 0xe0, 0x9b, 0xf9, 0x44, 0xcc, +}; + + +VOID +SYMCRYPT_CALL +SymCryptAesCmacSelftest(void) +{ + SYMCRYPT_AES_CMAC_EXPANDED_KEY xKey; + BYTE res[SYMCRYPT_AES_CMAC_RESULT_SIZE]; + + SymCryptAesCmacExpandKey( &xKey, SymCryptTestKey32, 16 ); + SymCryptAesCmac( &xKey, SymCryptTestMsg3, sizeof( SymCryptTestMsg3 ), res ); + + SymCryptInjectError( res, sizeof( res ) ); + if( memcmp( res, aesCmacKat, sizeof( res ) ) != 0 ) + { + SymCryptFatal( 'hsh5' ); + } + + // + // Normally we would wipe the expanded key structure here, + // but as this is a selftest with known data this is not needed. + // +} diff --git a/libs/symcrypt/lib/aeskw.c b/libs/symcrypt/lib/aeskw.c new file mode 100644 index 00000000000..c16ec6becf5 --- /dev/null +++ b/libs/symcrypt/lib/aeskw.c @@ -0,0 +1,457 @@ +// +// aeskw.c Implementation of the AES-KW(P) block cipher modes +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +// +// The KW and KWP modes have inherently terrible performance characteristics from how they are +// defined. Notably, they require a serial chain of AES block operations 12x longer than an +// equivalent AES-CBC encryption (which is already not a favored mode just because of the serial +// nature). +// Additionally the intermediate state of AES-KW and AES-KWP must be of a size proportional to +// the plaintext / ciphertext, rather than fitting into some constant-sized state. +// +// The current strategy for intermediate state handling is to allocate an internal buffer for +// the state. We expect that the caller does not care too much about performance if they are using +// these modes, so the overhead of an allocation per operation should not be a problem. +// +// While it is possible to expose an API surface which uses the destination buffer as a scratch +// buffer to store intermediate state, this would break the read-once/write-once rule, making the +// API surface brittle to misuse if the caller is encrypting to memory that may be in a different +// security domain (i.e. kernel caller encrypting a secret directly into memory which is mapped to +// user mode). +// If we need to expose a non-allocating version, we can introduce a lower-level API where the +// caller provides an appropriately sized scratch buffer, but we will cross that bridge if it is +// required. +// + +#include "precomp.h" + +const UINT64 SymCryptAesKwDefaultICV = 0xA6A6A6A6A6A6A6A6; +const UINT32 SymCryptAesKwpDefaultICV = 0xA65959A6; +#define SYMCRYPT_AES_SEMIBLOCK_SIZE (SYMCRYPT_AES_BLOCK_SIZE / 2) + +const SIZE_T SymCryptAesKWMinPlaintextLen = 16u; // 2*SYMCRYPT_AES_SEMIBLOCK_SIZE +const SIZE_T SymCryptAesKWMaxPlaintextLen = (1u<<31)-8; +const SIZE_T SymCryptAesKWMinCiphertextLen = 24u; // 3*SYMCRYPT_AES_SEMIBLOCK_SIZE +const SIZE_T SymCryptAesKWMaxCiphertextLen = (1u<<31); + +const SIZE_T SymCryptAesKWPMinPlaintextLen = 1u; +const SIZE_T SymCryptAesKWPMaxPlaintextLen = (1u<<31)-8; +const SIZE_T SymCryptAesKWPMinCiphertextLen = 16u; +const SIZE_T SymCryptAesKWPMaxCiphertextLen = (1u<<31); + +// +// This function corresponds to algorithm W(S) from section 6.1 of SP 800-38F +// +// We perform this algorithm destructively in place, reading and writing to the same location +// multiple times +// +static +VOID +SYMCRYPT_CALL +SymCryptAesKwxInternalWrap( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_bytes_(cbBuf) PBYTE pbBuf, + UINT32 cbBuf ) +{ + SYMCRYPT_ALIGN BYTE activeBlock[SYMCRYPT_AES_BLOCK_SIZE]; + const UINT32 nSemiBlocks = cbBuf / SYMCRYPT_AES_SEMIBLOCK_SIZE; // n per SP 800-38F + UINT64 encryptionIdx = 1; // t per SP 800-38F + UINT64 lowHalfTemp = 0; + + SYMCRYPT_ASSERT((cbBuf & (SYMCRYPT_AES_SEMIBLOCK_SIZE-1)) == 0); + SYMCRYPT_ASSERT(cbBuf >= SymCryptAesKWMinCiphertextLen); + SYMCRYPT_ASSERT(cbBuf <= SymCryptAesKWMaxCiphertextLen); + + // Special case for first encryption + // Initialize the low half of active block with the first semi-block of input + memcpy( activeBlock, pbBuf, SYMCRYPT_AES_SEMIBLOCK_SIZE); + + for( UINT32 outerLoopCnt = 0; outerLoopCnt < 6; outerLoopCnt++ ) + { + for( UINT32 innerLoopCnt = 1; innerLoopCnt < nSemiBlocks; innerLoopCnt++ ) + { + SIZE_T bufOffset = innerLoopCnt*SYMCRYPT_AES_SEMIBLOCK_SIZE; + + // Initialize the high half of active block to semi-block from buf + memcpy( activeBlock+SYMCRYPT_AES_SEMIBLOCK_SIZE, pbBuf+bufOffset, SYMCRYPT_AES_SEMIBLOCK_SIZE); + + // Encrypt activeBlock in place + SymCryptAesEncrypt( pExpandedKey, activeBlock, activeBlock ); + + // Store the high half of result back to semi-block from buf + memcpy( pbBuf+bufOffset, activeBlock+SYMCRYPT_AES_SEMIBLOCK_SIZE, SYMCRYPT_AES_SEMIBLOCK_SIZE ); + + // Use the low half of the result and the next encryptionIdx to + // initialize the low half of the next encryption + lowHalfTemp = SYMCRYPT_LOAD_LSBFIRST64( activeBlock ); + lowHalfTemp ^= SYMCRYPT_BSWAP64( encryptionIdx ); + SYMCRYPT_STORE_LSBFIRST64( activeBlock, lowHalfTemp ); + + // Update encryptionIdx + encryptionIdx++; + } + } + + SYMCRYPT_ASSERT( (encryptionIdx-1) == (nSemiBlocks-1)*6 ); + + // Special case for last encryption + // Store the final low half of encryption as the first semi-block of output + SYMCRYPT_STORE_LSBFIRST64( pbBuf, lowHalfTemp ); + + SymCryptWipeKnownSize( activeBlock, sizeof(activeBlock) ); +} + +// +// This function corresponds to algorithm W^-1(S) from section 6.1 of SP 800-38F +// +// We perform this algorithm destructively in place, reading and writing to the same location +// multiple times +// +static +VOID +SYMCRYPT_CALL +SymCryptAesKwxInternalUnwrap( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_bytes_(cbBuf) PBYTE pbBuf, + UINT32 cbBuf ) +{ + SYMCRYPT_ALIGN BYTE activeBlock[SYMCRYPT_AES_BLOCK_SIZE]; + const UINT32 nSemiBlocks = cbBuf / SYMCRYPT_AES_SEMIBLOCK_SIZE; // n per SP 800-38F + UINT64 decryptionIdx = 6*(nSemiBlocks-1); // t per SP 800-38F + UINT64 lowHalfTemp = 0; + + SYMCRYPT_ASSERT((cbBuf & (SYMCRYPT_AES_SEMIBLOCK_SIZE-1)) == 0); + SYMCRYPT_ASSERT(cbBuf >= SymCryptAesKWMinCiphertextLen); + SYMCRYPT_ASSERT(cbBuf <= SymCryptAesKWMaxCiphertextLen); + + // Special case for first decryption + // Initialize the low half temporary with the first semi-block of input + lowHalfTemp = SYMCRYPT_LOAD_LSBFIRST64( pbBuf ); + + for( UINT32 outerLoopCnt = 0; outerLoopCnt < 6; outerLoopCnt++ ) + { + for( UINT32 innerLoopCnt = nSemiBlocks-1; innerLoopCnt > 0; innerLoopCnt-- ) + { + SIZE_T bufOffset = innerLoopCnt*SYMCRYPT_AES_SEMIBLOCK_SIZE; + + // Update low half with decryptionIdx and store to low half of active block + lowHalfTemp ^= SYMCRYPT_BSWAP64( decryptionIdx ); + SYMCRYPT_STORE_LSBFIRST64( activeBlock, lowHalfTemp ); + + // Initialize the high half of active block to semi-block from buf + memcpy( activeBlock+SYMCRYPT_AES_SEMIBLOCK_SIZE, pbBuf+bufOffset, SYMCRYPT_AES_SEMIBLOCK_SIZE); + + // Decrypt activeBlock in place + SymCryptAesDecrypt( pExpandedKey, activeBlock, activeBlock ); + + // Store the high half of result back to semi-block from buf + memcpy( pbBuf+bufOffset, activeBlock+SYMCRYPT_AES_SEMIBLOCK_SIZE, SYMCRYPT_AES_SEMIBLOCK_SIZE ); + + // Update decryptionIdx + decryptionIdx--; + + // Use the low half of the result to set the low half temporary + lowHalfTemp = SYMCRYPT_LOAD_LSBFIRST64( activeBlock ); + } + } + + SYMCRYPT_ASSERT( decryptionIdx == 0 ); + + // Special case for last decryption + // Store the final low half of decryption as the first semi-block of output + SYMCRYPT_STORE_LSBFIRST64( pbBuf, lowHalfTemp ); + + SymCryptWipeKnownSize( activeBlock, sizeof(activeBlock) ); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptAesKwEncrypt( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_(cbSrc) PCBYTE pbSrc, + SIZE_T cbSrc, + _Out_writes_to_(cbDst, *pcbResult) PBYTE pbDst, + SIZE_T cbDst, + _Out_ SIZE_T* pcbResult ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PBYTE pbScratch = NULL; + UINT32 cbScratch = 0; + + if( (cbSrc < SymCryptAesKWMinPlaintextLen) || + (cbSrc > SymCryptAesKWMaxPlaintextLen) || + ((cbSrc & (SYMCRYPT_AES_SEMIBLOCK_SIZE-1)) != 0) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + cbScratch = ((UINT32) cbSrc)+SYMCRYPT_AES_SEMIBLOCK_SIZE; + if( cbDst < cbScratch ) + { + scError = SYMCRYPT_BUFFER_TOO_SMALL; + goto cleanup; + } + + pbScratch = SymCryptCallbackAlloc( cbScratch ); + if( pbScratch == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + // set up input buffer as ICV1 || P + SYMCRYPT_STORE_LSBFIRST64( pbScratch, SymCryptAesKwDefaultICV ); + memcpy( pbScratch+8, pbSrc, cbSrc ); + + // encrypt input buffer in place + SymCryptAesKwxInternalWrap( pExpandedKey, pbScratch, cbScratch ); + + // copy encrypted buffer to output + memcpy( pbDst, pbScratch, cbScratch ); + *pcbResult = cbScratch; + +cleanup: + if( pbScratch != NULL ) + { + SymCryptWipe( pbScratch, cbScratch ); + SymCryptCallbackFree( pbScratch ); + } + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptAesKwDecrypt( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_(cbSrc) PCBYTE pbSrc, + SIZE_T cbSrc, + _Out_writes_to_(cbDst, *pcbResult) PBYTE pbDst, + SIZE_T cbDst, + _Out_ SIZE_T* pcbResult ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PBYTE pbScratch = NULL; + UINT32 cbScratch = 0; + + if( (cbSrc < SymCryptAesKWMinCiphertextLen) || + (cbSrc > SymCryptAesKWMaxCiphertextLen) || + ((cbSrc & (SYMCRYPT_AES_SEMIBLOCK_SIZE-1)) != 0) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + cbScratch = (UINT32) cbSrc; + if( cbDst < cbScratch-SYMCRYPT_AES_SEMIBLOCK_SIZE ) + { + scError = SYMCRYPT_BUFFER_TOO_SMALL; + goto cleanup; + } + + pbScratch = SymCryptCallbackAlloc( cbScratch ); + if( pbScratch == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + // set up input buffer as C + memcpy( pbScratch, pbSrc, cbSrc ); + + // decrypt input buffer in place + SymCryptAesKwxInternalUnwrap( pExpandedKey, pbScratch, cbScratch ); + + // check first semi-block has the expected value + if( SYMCRYPT_LOAD_LSBFIRST64( pbScratch ) != SymCryptAesKwDefaultICV ) + { + scError = SYMCRYPT_AUTHENTICATION_FAILURE; + goto cleanup; + } + + // copy decrypted buffer to output + memcpy( pbDst, pbScratch+SYMCRYPT_AES_SEMIBLOCK_SIZE, cbScratch-SYMCRYPT_AES_SEMIBLOCK_SIZE ); + *pcbResult = cbScratch-SYMCRYPT_AES_SEMIBLOCK_SIZE; + +cleanup: + if( pbScratch != NULL ) + { + SymCryptWipe( pbScratch, cbScratch ); + SymCryptCallbackFree( pbScratch ); + } + return scError; + +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptAesKwpEncrypt( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_(cbSrc) PCBYTE pbSrc, + SIZE_T cbSrc, + _Out_writes_to_(cbDst, *pcbResult) PBYTE pbDst, + SIZE_T cbDst, + _Out_ SIZE_T* pcbResult ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PBYTE pbScratch = NULL; + UINT32 cbScratch = 0; + UINT32 cbPad = 0; + + if( (cbSrc < SymCryptAesKWPMinPlaintextLen) || + (cbSrc > SymCryptAesKWPMaxPlaintextLen) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + cbPad = SYMCRYPT_AES_SEMIBLOCK_SIZE - ((UINT32) cbSrc & (SYMCRYPT_AES_SEMIBLOCK_SIZE-1)); + if( cbPad == SYMCRYPT_AES_SEMIBLOCK_SIZE ) + { + cbPad = 0; + } + + cbScratch = (UINT32) cbSrc + SYMCRYPT_AES_SEMIBLOCK_SIZE + cbPad; + if( cbDst < cbScratch ) + { + scError = SYMCRYPT_BUFFER_TOO_SMALL; + goto cleanup; + } + + SYMCRYPT_ASSERT( cbScratch >= 16 ); + + pbScratch = SymCryptCallbackAlloc( cbScratch ); + if( pbScratch == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + // set up input buffer as ICV2 || len(P) || P || PAD + SYMCRYPT_STORE_LSBFIRST32( pbScratch, SymCryptAesKwpDefaultICV ); + SYMCRYPT_STORE_MSBFIRST32( pbScratch+4, (UINT32) cbSrc ); + // pad by unconditionally setting the last 8 bytes to 0 + // then overwrite some or all of the padding bytes with plaintext + SYMCRYPT_STORE_LSBFIRST64( pbScratch+cbScratch-SYMCRYPT_AES_SEMIBLOCK_SIZE, 0u ); + memcpy( pbScratch+8, pbSrc, cbSrc ); + + // encrypt input buffer in place + if( cbScratch == SYMCRYPT_AES_BLOCK_SIZE ) + { + // special case for AES-KWP with small plaintext + SymCryptAesEncrypt( pExpandedKey, pbScratch, pbScratch ); + } else { + SymCryptAesKwxInternalWrap( pExpandedKey, pbScratch, cbScratch ); + } + + // copy encrypted buffer to output + memcpy( pbDst, pbScratch, cbScratch ); + *pcbResult = cbScratch; + +cleanup: + if( pbScratch != NULL ) + { + SymCryptWipe( pbScratch, cbScratch ); + SymCryptCallbackFree( pbScratch ); + } + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptAesKwpDecrypt( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_(cbSrc) PCBYTE pbSrc, + SIZE_T cbSrc, + _Out_writes_to_(cbDst, *pcbResult) PBYTE pbDst, + SIZE_T cbDst, + _Out_ SIZE_T* pcbResult ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PBYTE pbScratch = NULL; + UINT32 cbScratch = 0; + UINT32 cbPlaintext = 0; + UINT32 cbPad = 0; + UINT32 mVerificationError = 0; // Mask indicating whether the decrypted buffer is malformed + UINT32 mIsPlaintext = 0; // Mask for plaintext bytes in the final semi-block + + if( (cbSrc < SymCryptAesKWPMinCiphertextLen) || + (cbSrc > SymCryptAesKWPMaxCiphertextLen) || + ((cbSrc & (SYMCRYPT_AES_SEMIBLOCK_SIZE-1)) != 0) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + cbScratch = (UINT32) cbSrc; + if( cbDst < cbScratch-SYMCRYPT_AES_SEMIBLOCK_SIZE-7 ) + { + scError = SYMCRYPT_BUFFER_TOO_SMALL; + goto cleanup; + } + + pbScratch = SymCryptCallbackAlloc( cbScratch ); + if( pbScratch == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + // set up input buffer as C + memcpy( pbScratch, pbSrc, cbSrc ); + + // decrypt input buffer in place + if( cbScratch == SYMCRYPT_AES_BLOCK_SIZE ) + { + // special case for AES-KWP with small ciphertext + SymCryptAesDecrypt( pExpandedKey, pbScratch, pbScratch ); + } else { + SymCryptAesKwxInternalUnwrap( pExpandedKey, pbScratch, cbScratch ); + } + + // Check if the decrypted buffer is of an expected form + // check bytes [0..3] are expected ICV + mVerificationError |= SYMCRYPT_LOAD_LSBFIRST32( pbScratch ) ^ SymCryptAesKwpDefaultICV; + + // check bytes [4..7] are a valid plaintext length (i.e. computed cbPad in range [0,7]) + cbPlaintext = SYMCRYPT_LOAD_MSBFIRST32( pbScratch+4 ); + cbPad = (UINT32) cbSrc - cbPlaintext - SYMCRYPT_AES_SEMIBLOCK_SIZE; + mVerificationError |= (cbPad & 0xfffffff8); + + // check that padding is all 0s + for( UINT32 i = 1; i<SYMCRYPT_AES_SEMIBLOCK_SIZE; i++ ) + { + mIsPlaintext = SymCryptMask32LtU31(i, SYMCRYPT_AES_SEMIBLOCK_SIZE-(cbPad&7)); + mVerificationError |= ((UINT32) pbScratch[ cbScratch-SYMCRYPT_AES_SEMIBLOCK_SIZE+i ]) & ~mIsPlaintext; + } + + // Now if there was any verification error, we fail + if( mVerificationError != 0 ) + { + scError = SYMCRYPT_AUTHENTICATION_FAILURE; + goto cleanup; + } + + // We are variable time w.r.t. the plaintext length on success + if( cbDst < cbPlaintext ) + { + scError = SYMCRYPT_BUFFER_TOO_SMALL; + goto cleanup; + } + + // copy decrypted buffer to output + memcpy( pbDst, pbScratch+SYMCRYPT_AES_SEMIBLOCK_SIZE, cbPlaintext ); + *pcbResult = cbPlaintext; + +cleanup: + if( pbScratch != NULL ) + { + SymCryptWipe( pbScratch, cbScratch ); + SymCryptCallbackFree( pbScratch ); + } + return scError; + +} diff --git a/libs/symcrypt/lib/blockciphermodes.c b/libs/symcrypt/lib/blockciphermodes.c new file mode 100644 index 00000000000..54fe294bb99 --- /dev/null +++ b/libs/symcrypt/lib/blockciphermodes.c @@ -0,0 +1,470 @@ +// +// BlockCipherModes.c generic implementation of all block cipher modes +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +VOID +SYMCRYPT_CALL +SymCryptEcbEncrypt( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ PCVOID pExpandedKey, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + SIZE_T i; + SIZE_T cbToDo = cbData & ~(pBlockCipher->blockSize - 1); + + if( pBlockCipher->ecbEncryptFunc != NULL ) + { + // + // Use optimized implementation if available + // + (*pBlockCipher->ecbEncryptFunc)( pExpandedKey, pbSrc, pbDst, cbData ); + return; + } + + // + // To avoid buffer overruns we truncate the work to an integral number of blocks. + // + + for( i=0; i<cbToDo; i+= pBlockCipher->blockSize ) + { + (*pBlockCipher->encryptFunc)( pExpandedKey, pbSrc + i, pbDst + i ); + } +} + +VOID +SYMCRYPT_CALL +SymCryptEcbDecrypt( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ PCVOID pExpandedKey, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + SIZE_T i; + SIZE_T cbToDo = cbData & ~(pBlockCipher->blockSize - 1); + + if( pBlockCipher->ecbDecryptFunc != NULL ) + { + // + // Use optimized implementation if available + // + (*pBlockCipher->ecbDecryptFunc)( pExpandedKey, pbSrc, pbDst, cbData ); + return; + } + + for( i=0; i<cbToDo; i+= pBlockCipher->blockSize ) + { + (*pBlockCipher->decryptFunc)( pExpandedKey, pbSrc + i, pbDst + i ); + } +} + + +// +// SymCryptCbcEncrypt +// +// Generic CBC encryption routine for block ciphers. +// The following restrictions must be obeyed: +// - blockSize <= 32 and must be a power of 2 +// - cbData must be a multiple of the block size +// +VOID +SYMCRYPT_CALL +SymCryptCbcEncrypt( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ PCVOID pExpandedKey, + _Inout_updates_( pBlockCipher->blockSize ) + PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + SYMCRYPT_ALIGN BYTE buf[SYMCRYPT_MAX_BLOCK_SIZE]; + SIZE_T blockSize; + PCBYTE pbSrcEnd; + PCBYTE pSrc = pbSrc; + PBYTE pDst = pbDst; + + if( pBlockCipher->cbcEncryptFunc != NULL ) + { + // + // Use optimized implementation if available + // + (*pBlockCipher->cbcEncryptFunc)( pExpandedKey, pbChainingValue, pSrc, pDst, cbData ); + return; + } + + blockSize = pBlockCipher->blockSize; + + SYMCRYPT_ASSERT( blockSize <= SYMCRYPT_MAX_BLOCK_SIZE ); + + + // + // Compute the end of the data, rounding the size down to a multiple of the block size. + // + pbSrcEnd = &pbSrc[ cbData & ~(blockSize - 1) ]; + + // + // We keep the chaining state in a local buffer to enforce the read-once write-once rule. + // + memcpy( buf, pbChainingValue, blockSize ); + while( pSrc < pbSrcEnd ) + { + SYMCRYPT_ASSERT( pSrc <= pbSrc + cbData - blockSize ); // help PreFast + SYMCRYPT_ASSERT( pDst <= pbDst + cbData - blockSize ); // help PreFast + SYMCRYPT_ASSERT( blockSize <= cbData ); // help PreFast + SymCryptXorBytes( pSrc, buf, buf, blockSize ); + (*pBlockCipher->encryptFunc)( pExpandedKey, buf, buf ); + memcpy( pDst, buf, blockSize ); + pSrc += blockSize; + pDst += blockSize; + } + + memcpy( pbChainingValue, buf, blockSize ); + + SymCryptWipeKnownSize( buf, sizeof( buf )); +} + +// +// SymCryptCbcDecrypt +// +// Generic CBC decryption routine for block ciphers. +// The following restrictions must be obeyed: +// - blockSize <= 32 and must be a power of 2 +// - cbData must be a multiple of the block size +// +VOID +SYMCRYPT_CALL +SymCryptCbcDecrypt( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ PCVOID pExpandedKey, + _Inout_updates_( pBlockCipher->blockSize ) + PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + SYMCRYPT_ALIGN BYTE buf[3 * SYMCRYPT_MAX_BLOCK_SIZE]; + PBYTE chain = &buf[0]; + PBYTE ciphertext = &buf[SYMCRYPT_MAX_BLOCK_SIZE]; + PBYTE tmp = &buf[2*SYMCRYPT_MAX_BLOCK_SIZE]; + + SIZE_T blockSize; + PCBYTE pbSrcEnd; + + if( pBlockCipher->cbcDecryptFunc != NULL ) + { + (*pBlockCipher->cbcDecryptFunc)( pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + return; + } + + blockSize = pBlockCipher->blockSize; + SYMCRYPT_ASSERT( blockSize <= SYMCRYPT_MAX_BLOCK_SIZE ); + + // + // Compute the end of the data, rounding the size down to a multiple of the block size. + // + pbSrcEnd = &pbSrc[ cbData & ~(blockSize-1) ]; + +#pragma warning(suppress: 22105) + memcpy( chain, pbChainingValue, blockSize ); + + // + // Loop structured to obey the read-once/write-once rule + // + while( pbSrc < pbSrcEnd ) + { + SYMCRYPT_ASSERT( pbSrc <= pbSrcEnd - blockSize ); // help PreFast + memcpy( ciphertext, pbSrc, blockSize ); + (*pBlockCipher->decryptFunc)( pExpandedKey, ciphertext, tmp ); + SymCryptXorBytes( tmp, chain, pbDst, blockSize ); + memcpy( chain, ciphertext, blockSize ); + pbDst += blockSize; + pbSrc += blockSize; + } + + memcpy( pbChainingValue, chain, blockSize ); + + SymCryptWipeKnownSize( buf, sizeof( buf )); +} + +VOID +SYMCRYPT_CALL +SymCryptCbcMac( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ PCVOID pExpandedKey, + _Inout_updates_( pBlockCipher->blockSize ) + PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + SIZE_T cbData ) +{ + SYMCRYPT_ALIGN BYTE buf[32]; + SIZE_T blockSize; + PCBYTE pbSrcEnd; + PCBYTE p; + + if( pBlockCipher->cbcMacFunc != NULL ) + { + // + // Use optimized implementation if available + // + (*pBlockCipher->cbcMacFunc)( pExpandedKey, pbChainingValue, pbSrc, cbData ); + return; + } + + blockSize = pBlockCipher->blockSize; + SYMCRYPT_ASSERT( blockSize <= SYMCRYPT_MAX_BLOCK_SIZE ); + + // + // Compute the end of the data, rounding the size down to a multiple of the block size. + // + pbSrcEnd = &pbSrc[ cbData & ~(blockSize - 1) ]; + + // + // We keep the chaining state in a local buffer to enforce the read-once write-once rule. + // It also improves memory locality. + // + memcpy( buf, pbChainingValue, blockSize ); + p = pbSrc; + while( p < pbSrcEnd ) + { + SYMCRYPT_ASSERT( p <= pbSrc + cbData - blockSize ); + SymCryptXorBytes( p, buf, buf, blockSize ); + (*pBlockCipher->encryptFunc)( pExpandedKey, buf, buf ); + p += blockSize; + } + + memcpy( pbChainingValue, buf, blockSize ); + + SymCryptWipeKnownSize( buf, sizeof( buf )); +} + +VOID +SYMCRYPT_CALL +SymCryptCtrMsb32( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ PCVOID pExpandedKey, + _Inout_updates_( pBlockCipher->blockSize ) + PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + SYMCRYPT_ALIGN BYTE buf[2 * SYMCRYPT_MAX_BLOCK_SIZE]; + PBYTE count = &buf[0]; + PBYTE keystream= &buf[SYMCRYPT_MAX_BLOCK_SIZE]; + SIZE_T blockSize; + PCBYTE pbSrcEnd; + + blockSize = pBlockCipher->blockSize; + SYMCRYPT_ASSERT( blockSize <= SYMCRYPT_MAX_BLOCK_SIZE ); + + // + // Compute the end of the data, rounding the size down to a multiple of the block size. + // + pbSrcEnd = &pbSrc[ cbData & ~(blockSize - 1) ]; + + // + // We keep the chaining state in a local buffer to enforce the read-once write-once rule. + // It also improves memory locality. + // + #pragma warning(suppress: 22105) + memcpy( count, pbChainingValue, blockSize ); + while( pbSrc < pbSrcEnd ) + { + SYMCRYPT_ASSERT( pbSrc <= pbSrcEnd - blockSize ); // help PreFast + (*pBlockCipher->encryptFunc)( pExpandedKey, count, keystream ); + SymCryptXorBytes( keystream, pbSrc, pbDst, blockSize ); + + // + // We only need to increment the last 32 bits of the counter value. + // + SYMCRYPT_STORE_MSBFIRST32( &count[ blockSize-4 ], 1 + SYMCRYPT_LOAD_MSBFIRST32( &count[ blockSize-4 ] ) ); + + pbSrc += blockSize; + pbDst += blockSize; + } + + memcpy( pbChainingValue, count, blockSize ); + + SymCryptWipeKnownSize( buf, sizeof( buf )); +} + +VOID +SYMCRYPT_CALL +SymCryptCtrMsb64( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ PCVOID pExpandedKey, + _Inout_updates_( pBlockCipher->blockSize ) + PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + SYMCRYPT_ALIGN BYTE buf[2 * SYMCRYPT_MAX_BLOCK_SIZE]; + PBYTE count = &buf[0]; + PBYTE keystream= &buf[SYMCRYPT_MAX_BLOCK_SIZE]; + SIZE_T blockSize; + PCBYTE pbSrcEnd; + + if( pBlockCipher->ctrMsb64Func != NULL ) + { + // + // Use optimized implementation if available + // + (*pBlockCipher->ctrMsb64Func)( pExpandedKey, pbChainingValue, pbSrc, pbDst, cbData ); + return; + } + + blockSize = pBlockCipher->blockSize; + SYMCRYPT_ASSERT( blockSize <= SYMCRYPT_MAX_BLOCK_SIZE ); + + // + // Compute the end of the data, rounding the size down to a multiple of the block size. + // + pbSrcEnd = &pbSrc[ cbData & ~(blockSize - 1) ]; + + // + // We keep the chaining state in a local buffer to enforce the read-once write-once rule. + // It also improves memory locality. + // + #pragma warning(suppress: 22105) + memcpy( count, pbChainingValue, blockSize ); + while( pbSrc < pbSrcEnd ) + { + SYMCRYPT_ASSERT( pbSrc <= pbSrcEnd - blockSize ); // help PreFast + (*pBlockCipher->encryptFunc)( pExpandedKey, count, keystream ); + SymCryptXorBytes( keystream, pbSrc, pbDst, blockSize ); + + // + // We only need to increment the last 64 bits of the counter value. + // + SYMCRYPT_STORE_MSBFIRST64( &count[ blockSize-8 ], 1 + SYMCRYPT_LOAD_MSBFIRST64( &count[ blockSize-8 ] ) ); + + pbSrc += blockSize; + pbDst += blockSize; + } + + memcpy( pbChainingValue, count, blockSize ); + + SymCryptWipeKnownSize( buf, sizeof( buf )); +} + +VOID +SYMCRYPT_CALL +SymCryptCfbEncrypt( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + SIZE_T cbShift, + _In_ PCVOID pExpandedKey, + _Inout_updates_( pBlockCipher->blockSize ) + PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +// +// Encrypt a buffer using the CFB cipher mode. +// +// This implements the CFB mode using a 1-byte feedback shift. +// This requires a block cipher encryption call for each byte, which is very slow. +// Use of this cipher mode is not recommended. +// +// - pBlockCipher is a pointer to the block cipher description table. +// Suitable description tables for all ciphers in this library have been pre-defined. +// - pExpandedKey points to the expanded key to use. This generic function uses PVOID so there +// is no type safety to ensure that the expanded key and the encryption function match. +// - pbChainingValue points to the chaining value. On entry and exit it +// contains the last blockSize ciphertext bytes. +// - pbSrc is the input data buffer that will be encrypted/decrypted. +// - cbData. Number of bytes to encrypt/decrypt. This must be a multiple of the block size. +// - pbDst is the output buffer that receives the encrypted/decrypted data. The input and output +// buffers may be the same or non-overlapping, but may not partially overlap. +// +{ + SYMCRYPT_ALIGN BYTE buf[2*SYMCRYPT_MAX_BLOCK_SIZE]; + PBYTE chain = &buf[0]; + PBYTE tmp = &buf[SYMCRYPT_MAX_BLOCK_SIZE]; + SIZE_T blockSize; + + blockSize = pBlockCipher->blockSize; + SYMCRYPT_ASSERT( blockSize <= SYMCRYPT_MAX_BLOCK_SIZE ); + + // Force cbShift to either be 1 or blockSize + if(cbShift != 1) + { + cbShift = blockSize; + } + + memcpy( chain, pbChainingValue, blockSize ); + while( cbData >= cbShift ) + { + (*pBlockCipher->encryptFunc)( pExpandedKey, chain, tmp ); + SymCryptXorBytes( pbSrc, tmp, tmp, cbShift ); // tmp[0..cbShift-1] ^= pbSrc[0..cbShift-1] + memcpy( pbDst, tmp, cbShift ); + + memmove( chain, chain + cbShift, blockSize - cbShift ); + memcpy( chain + blockSize - cbShift, tmp, cbShift ); + + pbDst += cbShift; + pbSrc += cbShift; + cbData -= cbShift; + } + + memcpy( pbChainingValue, chain, blockSize ); +} + + +VOID +SYMCRYPT_CALL +SymCryptCfbDecrypt( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + SIZE_T cbShift, + _In_ PCVOID pExpandedKey, + _Inout_updates_( pBlockCipher->blockSize ) + PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + SYMCRYPT_ALIGN BYTE buf[2*SYMCRYPT_MAX_BLOCK_SIZE]; + PBYTE chain = &buf[0]; + PBYTE tmp = &buf[SYMCRYPT_MAX_BLOCK_SIZE]; + SIZE_T blockSize; + + blockSize = pBlockCipher->blockSize; + SYMCRYPT_ASSERT( blockSize <= SYMCRYPT_MAX_BLOCK_SIZE ); + + // Force cbShift to either be 1 or blockSize + if(cbShift != 1) + { + cbShift = blockSize; + } + + memcpy( chain, pbChainingValue, blockSize ); + while( cbData >= cbShift ) + { + (*pBlockCipher->encryptFunc)( pExpandedKey, chain, tmp ); + + // + // First we update the chain block + // + + memmove( chain, chain + cbShift, blockSize - cbShift ); + memcpy( chain + blockSize - cbShift, pbSrc, cbShift ); + + // + // To obey the read-once rule, we take the ciphertext from the updated chain block. + // + SymCryptXorBytes( chain + blockSize - cbShift, tmp, pbDst, cbShift ); + + pbDst += cbShift; + pbSrc += cbShift; + cbData -= cbShift; + } + + memcpy( pbChainingValue, chain, blockSize ); +} diff --git a/libs/symcrypt/lib/ccm.c b/libs/symcrypt/lib/ccm.c new file mode 100644 index 00000000000..912f657d10e --- /dev/null +++ b/libs/symcrypt/lib/ccm.c @@ -0,0 +1,634 @@ +// +// CCM.c implementation of the CCM block cipher mode +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +#define CCM_MIN_NONCE_SIZE (7) +#define CCM_MAX_NONCE_SIZE (13) +#define CCM_MIN_TAG_SIZE (4) +#define CCM_MAX_TAG_SIZE (16) + +#define CCM_MAX_COUNTER_SIZE (SYMCRYPT_CCM_BLOCK_SIZE - 1 - CCM_MIN_NONCE_SIZE) + +#define AUTHDATA_16BIT_LIMIT ((1<<16) - (1<<8)) +#define AUTHDATA_32BIT_LIMIT (1ull << 32) + +// Compile time BOOL statically determines if we need to check cbAuthData < AUTHDATA_32BIT_LIMIT +// Used to suppress MSVC C4127 and clang Wtautological-constant-out-of-range-compare on 32b platforms +const BOOL fcbAuthDataLt32bitLimitStatic = SIZE_T_MAX < AUTHDATA_32BIT_LIMIT; + +#define CCM_BLOCK_MOD_MASK (SYMCRYPT_CCM_BLOCK_SIZE - 1) +#define CCM_BLOCK_ROUND_MASK (~CCM_BLOCK_MOD_MASK) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptCcmValidateParameters( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ SIZE_T cbNonce, + _In_ SIZE_T cbAssociatedData, + _In_ UINT64 cbData, + _In_ SIZE_T cbTag + ) +{ + SIZE_T cbCounter; + + UNREFERENCED_PARAMETER( cbAssociatedData ); + + if( pBlockCipher->blockSize != SYMCRYPT_CCM_BLOCK_SIZE ) + { + return SYMCRYPT_WRONG_BLOCK_SIZE; + } + + // + // Test against limits in SP800-38C appendix A + // + if( cbNonce < CCM_MIN_NONCE_SIZE || cbNonce > CCM_MAX_NONCE_SIZE ) + { + return SYMCRYPT_WRONG_NONCE_SIZE; + } + + // + // cbAssociatedData is limited to <2^64 + // We don't test for this. None of our platforms has a SIZE_T that is + // large enough to violate this condition. And the test + // is of a form that the compiler cannot optimize away. + // + + // + // The counter block consists of a single flag byte, the nonce, and the counter field. + // + cbCounter = SYMCRYPT_CCM_BLOCK_SIZE - cbNonce - 1; + + // + // per SP800-38C cbData is limited to 2^{8*cbCounter} + // There is no way to do this test in a single comparison. + // We don't have to worry about side-channels in the && because + // cbCounter depends only on the length of the nonce, and we do not + // try to hide any lengths. + // + if( cbCounter < sizeof( UINT64 ) && + cbData >= ((UINT64)1 << (8*cbCounter)) ) + { + return SYMCRYPT_WRONG_DATA_SIZE; + } + + if( cbTag < CCM_MIN_TAG_SIZE || + cbTag > CCM_MAX_TAG_SIZE || + (cbTag & 1) == 1 // valid tag lengths are [4, 6, 8, ..., 16] + ) + { + return SYMCRYPT_WRONG_TAG_SIZE; + } + + return SYMCRYPT_NO_ERROR; +} + + + +VOID +SYMCRYPT_CALL +SymCryptCcmEncryptDecryptPart( + _Inout_ PSYMCRYPT_CCM_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) + +{ + SIZE_T cbToDo = cbData; + SIZE_T bytesToProcess; + + // + // Use any left-over key stream + // + while( (pState->bytesProcessed & CCM_BLOCK_MOD_MASK) != 0 && cbToDo > 0 ) + { + *pbDst = *pbSrc ^ pState->keystreamBlock[ pState->bytesProcessed & CCM_BLOCK_MOD_MASK ]; + pbDst++; + pbSrc++; + cbToDo--; + pState->bytesProcessed++; + } + + // + // Bulk process the main part of the input and output + // + if( cbToDo >= SYMCRYPT_CCM_BLOCK_SIZE ) + { + bytesToProcess = cbToDo & CCM_BLOCK_ROUND_MASK; + SYMCRYPT_ASSERT( bytesToProcess <= cbToDo ); + + SYMCRYPT_ASSERT( pState->pBlockCipher->blockSize == SYMCRYPT_CCM_BLOCK_SIZE ); + SymCryptCtrMsb64( pState->pBlockCipher, + pState->pExpandedKey, + &pState->counterBlock[0], + pbSrc, + pbDst, + bytesToProcess ); + pbSrc += bytesToProcess; + pbDst += bytesToProcess; + pState->bytesProcessed += bytesToProcess; + cbToDo -= bytesToProcess; + } + + if( cbToDo > 0 ) + { + // + // Encrypt an all-zero key stream block to get the key stream. + // + SymCryptWipeKnownSize( &pState->keystreamBlock[0], SYMCRYPT_CCM_BLOCK_SIZE ); + + SYMCRYPT_ASSERT( pState->pBlockCipher->blockSize == SYMCRYPT_CCM_BLOCK_SIZE ); + SymCryptCtrMsb64( pState->pBlockCipher, + pState->pExpandedKey, + &pState->counterBlock[0], + &pState->keystreamBlock[0], + &pState->keystreamBlock[0], + SYMCRYPT_CCM_BLOCK_SIZE ); + while( cbToDo > 0 ) + { + *pbDst = *pbSrc ^ pState->keystreamBlock[ pState->bytesProcessed & CCM_BLOCK_MOD_MASK ]; + pbDst++; + pbSrc++; + cbToDo--; + pState->bytesProcessed++; + } + } +} + + +VOID +SYMCRYPT_CALL +SymCryptCcmAddMacData( + _Inout_ PSYMCRYPT_CCM_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + SIZE_T bytesToProcess; + if( pState->bytesInMacBlock > 0 ) + { + bytesToProcess = SYMCRYPT_MIN( cbData, SYMCRYPT_CCM_BLOCK_SIZE - pState->bytesInMacBlock ); + SymCryptXorBytes( &pState->macBlock[pState->bytesInMacBlock], pbData, &pState->macBlock[pState->bytesInMacBlock], bytesToProcess ); + pbData += bytesToProcess; + cbData -= bytesToProcess; + pState->bytesInMacBlock += bytesToProcess; + + if( pState->bytesInMacBlock == SYMCRYPT_CCM_BLOCK_SIZE ) + { + pState->pBlockCipher->encryptFunc( pState->pExpandedKey, &pState->macBlock[0], &pState->macBlock[0] ); + pState->bytesInMacBlock = 0; + } + } + + if( cbData >= SYMCRYPT_CCM_BLOCK_SIZE ) + { + bytesToProcess = cbData & CCM_BLOCK_ROUND_MASK; + SYMCRYPT_ASSERT( pState->pBlockCipher->blockSize == SYMCRYPT_CCM_BLOCK_SIZE ); + + SymCryptCbcMac( pState->pBlockCipher, + pState->pExpandedKey, + &pState->macBlock[0], + pbData, + bytesToProcess ); + + pbData += bytesToProcess; + cbData -= bytesToProcess; + } + + if( cbData > 0 ) + { + SymCryptXorBytes( &pState->macBlock[0], pbData, &pState->macBlock[0], cbData ); + pState->bytesInMacBlock = cbData; + } +} + +VOID +SYMCRYPT_CALL +SymCryptCcmPadMacData( _Inout_ PSYMCRYPT_CCM_STATE pState ) +{ + // + // Pad the MAC data with zeroes until we hit the block size. + // The data is xorred into macBlock, so we don't have to update that. + // All we do is apply the block cipher if there was any data remaining in the macBlock. + // + if( pState->bytesInMacBlock > 0 ) + { + pState->pBlockCipher->encryptFunc( pState->pExpandedKey, &pState->macBlock[0], &pState->macBlock[0] ); + pState->bytesInMacBlock = 0; + } +} + + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptCcmEncrypt( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ PCVOID pExpandedKey, + _In_reads_( cbNonce ) PCBYTE pbNonce, + SIZE_T cbNonce, + _In_reads_opt_( cbAuthData ) PCBYTE pbAuthData, + SIZE_T cbAuthData, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData, + _Out_writes_( cbTag ) PBYTE pbTag, + SIZE_T cbTag ) +{ + SYMCRYPT_CCM_STATE state; + + SymCryptCcmInit( &state, + pBlockCipher, + pExpandedKey, + pbNonce, cbNonce, + pbAuthData, cbAuthData, + cbData, cbTag ); + + SymCryptCcmEncryptPart( &state, pbSrc, pbDst, cbData ); + + SymCryptCcmEncryptFinal( &state, pbTag, cbTag ); +} + +SYMCRYPT_NOINLINE +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptCcmDecrypt( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ PCVOID pExpandedKey, + _In_reads_( cbNonce ) PCBYTE pbNonce, + SIZE_T cbNonce, + _In_reads_opt_( cbAuthData ) PCBYTE pbAuthData, + SIZE_T cbAuthData, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData, + _In_reads_( cbTag ) PCBYTE pbTag, + SIZE_T cbTag ) +{ + SYMCRYPT_CCM_STATE state; + SYMCRYPT_ERROR status; + + SymCryptCcmInit( &state, + pBlockCipher, + pExpandedKey, + pbNonce, cbNonce, + pbAuthData, cbAuthData, + cbData, cbTag ); + + + SymCryptCcmDecryptPart( &state, pbSrc, pbDst, cbData ); + + status = SymCryptCcmDecryptFinal( &state, pbTag, cbTag ); + + // + // If we failed for any reason we wipe our output buffer to avoid returning + // decrypted but unauthenticated data. + // + if( status != SYMCRYPT_NO_ERROR ) + { + SymCryptWipe( pbDst, cbData ); + } + + return status; +} + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptCcmInit( + _Out_ PSYMCRYPT_CCM_STATE pState, + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ PCVOID pExpandedKey, + _In_reads_( cbNonce ) PCBYTE pbNonce, + SIZE_T cbNonce, + _In_reads_opt_( cbAuthData ) PCBYTE pbAuthData, + SIZE_T cbAuthData, + UINT64 cbData, + SIZE_T cbTag ) +{ + BYTE flags; + BYTE tmpBuf[ SYMCRYPT_CCM_BLOCK_SIZE ]; + SIZE_T cbCounter; + + SYMCRYPT_SET_MAGIC( pState ); + + // + // Validate parameters in checked builds + // + SYMCRYPT_ASSERT( SymCryptCcmValidateParameters( pBlockCipher, cbNonce, cbAuthData, cbData, cbTag ) == SYMCRYPT_NO_ERROR ); + + + // + // compute # bytes in the counter field + // We limit cbNonce to 15 so that cbCounter + cbNonce = 15 will always hold + // This is much cheaper than full parameter validation, and it is enough to + // avoid any buffer overflows. + // + cbNonce &= SYMCRYPT_CCM_BLOCK_SIZE - 1; + cbCounter = SYMCRYPT_CCM_BLOCK_SIZE - 1 - cbNonce; + + pState->pBlockCipher = pBlockCipher; + pState->pExpandedKey = pExpandedKey; + pState->cbNonce = cbNonce; + pState->cbData = cbData; + pState->cbTag = cbTag; + pState->cbCounter = cbCounter; + pState->bytesProcessed = 0; + pState->bytesInMacBlock = 0; + + // + // Build the initial blocks for authentication and en/decryption + // + // Per Sp800-38c the flag byte is made up of four fields: + // Bits 0-2 are cbCounter - 1 + // Bits 3-5 are (cbTag-2)/2 + // Bit 6 is 1 if cbAuthData > 0 + // Bit 7 is reserved and set to 0. + flags = (BYTE) (pState->cbCounter - 1); + flags |= ((cbTag-2)/2) << 3; + if( cbAuthData > 0 ) + { + // + // No side-channel concerns with this if statements as we don't try to hide the + // data length or presence of authdata. + // + flags |= (1 << 6); + } + + + // + // The MAC starting block consists of three fields: + // the flag byte, the nonce, and cbData encoded into cbCounter bytes. + // + pState->macBlock[0] = flags; + memcpy( &pState->macBlock[1], pbNonce, cbNonce ); + SYMCRYPT_STORE_MSBFIRST64( &tmpBuf[0], cbData ); + memcpy( &pState->macBlock[1+cbNonce], &tmpBuf[ 8 - cbCounter ], cbCounter ); + + // + // The counter block is similar in layout, but with two changes: + // Bits 3-7 of the flag bytes are set to 0. + // The counter field is set to one (first counter value used for data encryption). + // Wiping the whole block first is probably faster, as the size is known and the + // block is aligned. + // We also copy the nonce from the mac block to follow the read-once rule. + // + SymCryptWipeKnownSize( &pState->counterBlock[0], SYMCRYPT_CCM_BLOCK_SIZE ); + pState->counterBlock[0] = (BYTE)(flags & 0x7); + memcpy( &pState->counterBlock[1], &pState->macBlock[1], cbNonce ); + pState->counterBlock[ SYMCRYPT_CCM_BLOCK_SIZE - 1] = 1; + + // + // Encrypt the current MAC block; our CBC convention is to do the encryption + // as soon as we have enough data. + // + pBlockCipher->encryptFunc( pExpandedKey, &pState->macBlock[0], &pState->macBlock[0] ); + + // + // Next we process the associated data + // See the CCM specs for the details + // + if( cbAuthData <= 0 ) + { + // + // cbAuthData == 0, nothing needs to be done. + // + } else if( cbAuthData < AUTHDATA_16BIT_LIMIT ) + { + // + // 16-bit length encoding. + // + SYMCRYPT_STORE_MSBFIRST16( &tmpBuf[0], (UINT16) cbAuthData ); + SymCryptCcmAddMacData( pState, &tmpBuf[0], 2 ); + } else if( fcbAuthDataLt32bitLimitStatic || cbAuthData < AUTHDATA_32BIT_LIMIT ) + { + // + // 32-bit length + // + tmpBuf[0] = 0xff; + tmpBuf[1] = 0xfe; // Magic prefix as per SP 800-38c + SYMCRYPT_STORE_MSBFIRST32( &tmpBuf[2], (UINT32) cbAuthData ); + SymCryptCcmAddMacData( pState, &tmpBuf[0], 2 + sizeof( UINT32 ) ); + } else + { + // + // 64-bit length + // + tmpBuf[0] = 0xff; + tmpBuf[1] = 0xff; // Magic prefix as per SP 800-38c + SYMCRYPT_STORE_MSBFIRST64( &tmpBuf[2], cbAuthData ); + SymCryptCcmAddMacData( pState, &tmpBuf[0], 2 + sizeof( UINT64 ) ); + } + + SymCryptCcmAddMacData( pState, pbAuthData, cbAuthData ); + SymCryptCcmPadMacData( pState ); // Pad MAC data with zeroes until the next block size boundary + +} + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptCcmEncryptPart( + _Inout_ PSYMCRYPT_CCM_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + UINT64 bytesProcessedAfterThisCall; + + SYMCRYPT_CHECK_MAGIC( pState ); + + bytesProcessedAfterThisCall = cbData + pState->bytesProcessed; + + SYMCRYPT_ASSERT( bytesProcessedAfterThisCall >= cbData && + bytesProcessedAfterThisCall <= pState->cbData ); + + // + // We are violating the read-once implementation rule here. We read the data twice: + // once for MACing and once for encryption. + // In this particular situation this is safe to do. + // We consider the read for the MAC operation as reading the 'real' value. + // The encryption code reads the data, but all it does is XOR the key stream into + // it. (CCM encryption uses CTR mode for the encryption part.) + // We don't care if the attacker modifies the data before the encryption. + // We are revealing the key stream anyway (from the plaintext and ciphertext) and + // the exact byte value that we xor the key stream into is irrelevant. + // + SymCryptCcmAddMacData( pState, pbSrc, cbData ); + + SymCryptCcmEncryptDecryptPart( pState, pbSrc, pbDst, cbData ); + +} + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptCcmEncryptFinal( + _Inout_ PSYMCRYPT_CCM_STATE pState, + _Out_writes_( cbTag ) PBYTE pbTag, + SIZE_T cbTag ) +{ + // + // Check invariants in checked builds + // + SYMCRYPT_CHECK_MAGIC( pState ); + + SYMCRYPT_ASSERT( cbTag == pState->cbTag && pState->bytesProcessed == pState->cbData ); + + + SymCryptCcmPadMacData( pState ); + + // + // Set the counter value to zero to get the counter value that encrypts the tag, + // and then encrypt the tag. + // We reset bytesProcessed so that the partial encrypt/decrypt function will do the right thing + // + SymCryptWipe( &pState->counterBlock[1 + pState->cbNonce], pState->cbCounter ); + + pState->bytesProcessed = 0; + + SymCryptCcmEncryptDecryptPart( pState, &pState->macBlock[0], &pState->macBlock[0], SYMCRYPT_CCM_BLOCK_SIZE ); + + memcpy( pbTag, &pState->macBlock[0], cbTag ); + + SymCryptWipeKnownSize( pState, sizeof( *pState ) ); + SYMCRYPT_ASSERT( pState->bytesInMacBlock == 0 ); +} + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptCcmDecryptPart( + _Inout_ PSYMCRYPT_CCM_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + UINT64 bytesProcessedAfterThisCall; + + SYMCRYPT_CHECK_MAGIC( pState ); + + bytesProcessedAfterThisCall = cbData + pState->bytesProcessed; + + SYMCRYPT_ASSERT( bytesProcessedAfterThisCall >= cbData && + bytesProcessedAfterThisCall <= pState->cbData ); + + + // + // We are violating the read-once/write-once implementation rule here. + // We write the decrypted data and then read it back for the authentication function. + // In this particular situation this is safe to do. + // + // Anyone who can access the memory space that contains the source and destination of this + // function can recover the key stream used for this (key,nonce) combination. + // We can think of the decryption function as merely exposing the key stream, and then the + // caller picking the ciphertext (and by implication the plaintext) to be authenticated. + // Thus the data we read during authentication is the 'real' plaintext, and the + // decryption function merely made the key stream available. + // + // Note that this would not safe in general, it is only safe because CTR mode decryption already + // reveals the key stream. + // + SymCryptCcmEncryptDecryptPart( pState, pbSrc, pbDst, cbData ); + SymCryptCcmAddMacData( pState, pbDst, cbData ); + +} + +SYMCRYPT_NOINLINE +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptCcmDecryptFinal( + _Inout_ PSYMCRYPT_CCM_STATE pState, + _In_reads_( cbTag ) PCBYTE pbTag, + SIZE_T cbTag ) +{ + SYMCRYPT_ERROR status; + + // + // Check invariants in checked builds + // + SYMCRYPT_CHECK_MAGIC( pState ); + + SYMCRYPT_ASSERT( cbTag == pState->cbTag && pState->bytesProcessed == pState->cbData ); + + SymCryptCcmPadMacData( pState ); + + // + // Set the counter value to zero to get the counter value that encrypts the tag, + // and then encrypt the tag + // We reset bytesProcessed so that the partial encrypt/decrypt function will do the right thing + // + SymCryptWipe( &pState->counterBlock[1 + pState->cbNonce], pState->cbCounter ); + + pState->bytesProcessed = 0; + + SymCryptCcmEncryptDecryptPart( pState, &pState->macBlock[0], &pState->macBlock[0], SYMCRYPT_CCM_BLOCK_SIZE ); + + if( !SymCryptEqual( pbTag, &pState->macBlock[0], cbTag ) ) + { + status = SYMCRYPT_AUTHENTICATION_FAILURE; + } + else + { + status = SYMCRYPT_NO_ERROR; + } + + SymCryptWipeKnownSize( pState, sizeof( *pState ) ); + SYMCRYPT_ASSERT( pState->bytesInMacBlock == 0 ); + + return status; +} + + +static const BYTE SymCryptCcmSelftestResult[3 + SYMCRYPT_AES_BLOCK_SIZE ] = +{ + 0x42, 0xd7, 0xda, + 0x3d, 0x9e, 0x95, 0x82, 0x29, 0x3c, 0x10, 0x9c, 0xa3, 0x39, 0x31, 0x3f, 0x18, 0xf3, 0x10, 0xf6 +}; + +VOID +SYMCRYPT_CALL +SymCryptCcmSelftest(void) +{ + BYTE buf[ 3 + SYMCRYPT_AES_BLOCK_SIZE ]; + SYMCRYPT_AES_EXPANDED_KEY key; + SYMCRYPT_ERROR err; + + if( SymCryptAesExpandKey( &key, SymCryptTestKey32, 16 ) != SYMCRYPT_NO_ERROR ) + { + SymCryptFatal( 'ccm0' ); + } + + SymCryptCcmEncrypt( SymCryptAesBlockCipher, + &key, + &SymCryptTestKey32[16], 12, + NULL, 0, + &SymCryptTestMsg3[0], buf, 3, + &buf[3], SYMCRYPT_AES_BLOCK_SIZE ); + + SymCryptInjectError( buf, sizeof( buf ) ); + if( memcmp( buf, SymCryptCcmSelftestResult, sizeof( buf ) ) != 0 ) + { + SymCryptFatal( 'ccm1' ); + } + + // inject error into the ciphertext or tag + SymCryptInjectError( buf, sizeof( buf ) ); + + err = SymCryptCcmDecrypt( SymCryptAesBlockCipher, + &key, + &SymCryptTestKey32[16], 12, + NULL, 0, + buf, buf, 3, + &buf[3], SYMCRYPT_AES_BLOCK_SIZE ); + + SymCryptInjectError( buf, 3 ); + + if( err != SYMCRYPT_NO_ERROR || memcmp( buf, SymCryptTestMsg3, 3 ) != 0 ) + { + SymCryptFatal( 'ccm2' ); + } + +} diff --git a/libs/symcrypt/lib/chacha20.c b/libs/symcrypt/lib/chacha20.c new file mode 100644 index 00000000000..4841babea3c --- /dev/null +++ b/libs/symcrypt/lib/chacha20.c @@ -0,0 +1,267 @@ +// +// ChaCha20.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +VOID +SYMCRYPT_CALL +SymCryptChaCha20CryptBlocks( + _Inout_ PSYMCRYPT_CHACHA20_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); +// Encrypt Src to Dst using whole blocks, starting at block floor(pState->offset/64). +// # blocks processed is floor( cbData / 64 ) +// pState->offset point is updated by 64 for each block encrypted + + + +#define OFFSET_MASK (((UINT64)1 << 38) - 1) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptChaCha20Init( + _Out_ PSYMCRYPT_CHACHA20_STATE pState, + _In_reads_( cbKey ) PCBYTE pbKey, + _In_ SIZE_T cbKey, + _In_reads_( cbNonce ) PCBYTE pbNonce, + SIZE_T cbNonce, + UINT64 offset ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + if (cbKey != 32) + { + scError = SYMCRYPT_WRONG_KEY_SIZE; + goto cleanup; + } + + if (cbNonce != 12) + { + scError = SYMCRYPT_WRONG_NONCE_SIZE; + goto cleanup; + } + + SymCryptLsbFirstToUint32( pbKey, &pState->key[0], 8 ); + SymCryptLsbFirstToUint32( pbNonce, &pState->nonce[0], 3 ); + + SymCryptChaCha20SetOffset( pState, offset ); + +cleanup: + return scError; +} + +VOID +SYMCRYPT_CALL +SymCryptChaCha20SetOffset( + _Inout_ PSYMCRYPT_CHACHA20_STATE pState, + UINT64 offset ) +{ + pState->offset = offset; + pState->keystreamBufferValid = FALSE; +} + +VOID +SYMCRYPT_CALL +SymCryptChaCha20Crypt( + _Inout_ PSYMCRYPT_CHACHA20_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + UINT32 blockOffset; + SIZE_T nBytes; + + blockOffset = pState->offset & 0x3f; + + // If the offset is in the middle of the block, we first crypt until the end + // of the block + if( blockOffset != 0 ) + { + if( !pState->keystreamBufferValid ) + { + // Generate a block of key stream + SymCryptWipe( &pState->keystream[0], 64 ); + SymCryptChaCha20CryptBlocks( pState, + &pState->keystream[0], + &pState->keystream[0], + 64 ); + pState->offset -= 64; // Don't update the offset yet + } + + nBytes = 64 - blockOffset; // # bytes in buffer starting at offset + if( cbData < nBytes ) + { + // We don't use the generated block to the end. The buffer will be valid + // at the end as the offset won't advance beyond the block. + nBytes = cbData; + pState->keystreamBufferValid = TRUE; + } else { + // We'll use the rest of the generated block. After that the key stream + // buffer won't be valid as the offset will advance beyond it. + pState->keystreamBufferValid = FALSE; + } + + SymCryptXorBytes( pbSrc, &pState->keystream[ blockOffset ], pbDst, nBytes ); + pbSrc += nBytes; + pbDst += nBytes; + cbData -= nBytes; + pState->offset += nBytes; + } + + // Here: pbSrc, pbDst, cbData, and pState->offset all in sync + // and either cbData == 0 or offset is at a block boundary + + if( cbData >= 64 ) + { + nBytes = cbData & ~0x3f; + SymCryptChaCha20CryptBlocks( pState, pbSrc, pbDst, nBytes ); + pbSrc += nBytes; + pbDst += nBytes; + cbData -= nBytes; + } + + if( cbData > 0 ) + { + // Generate a block of key stream + SymCryptWipe( &pState->keystream[0], 64 ); + SymCryptChaCha20CryptBlocks( pState, + &pState->keystream[0], + &pState->keystream[0], + 64 ); + pState->offset -= 64; // Don't update the offset yet + pState->keystreamBufferValid = TRUE; + + SymCryptXorBytes( pbSrc, &pState->keystream[0], pbDst, cbData ); + pState->offset += cbData; + // The following updates are correct but not needed + // pbSrc += cbData; + // pbDst += cbData; + // cbData -= cbData; + } +} + +#define CHACHA_QUARTERROUND( a, b, c, d ) { \ + a += b; d ^= a; d = ROL32( d, 16 ); \ + c += d; b ^= c; b = ROL32( b, 12 ); \ + a += b; d ^= a; d = ROL32( d, 8 ); \ + c += d; b ^= c; b = ROL32( b, 7 ); \ +} + +VOID +SYMCRYPT_CALL +SymCryptChaCha20CryptBlocks( + _Inout_ PSYMCRYPT_CHACHA20_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + UINT32 counter; + UINT32 s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15; + int i; + + counter = (UINT32)(pState->offset >> 6); + + while( cbData >= 64 ) + { + // Initialize the state + s0 = 0x61707865; + s1 = 0x3320646e; + s2 = 0x79622d32; + s3 = 0x6b206574; + s4 = pState->key[0]; + s5 = pState->key[1]; + s6 = pState->key[2]; + s7 = pState->key[3]; + s8 = pState->key[4]; + s9 = pState->key[5]; + s10 = pState->key[6]; + s11 = pState->key[7]; + s12 = counter; + s13 = pState->nonce[0]; + s14 = pState->nonce[1]; + s15 = pState->nonce[2]; + + for( i=0; i<10; i++ ) + { + CHACHA_QUARTERROUND( s0 , s4 , s8 , s12 ); + CHACHA_QUARTERROUND( s1 , s5 , s9 , s13 ); + CHACHA_QUARTERROUND( s2 , s6 , s10, s14 ); + CHACHA_QUARTERROUND( s3 , s7 , s11, s15 ); + + CHACHA_QUARTERROUND( s0 , s5 , s10, s15 ); + CHACHA_QUARTERROUND( s1 , s6 , s11, s12 ); + CHACHA_QUARTERROUND( s2 , s7 , s8 , s13 ); + CHACHA_QUARTERROUND( s3 , s4 , s9 , s14 ); + } + + s0 += 0x61707865; + s1 += 0x3320646e; + s2 += 0x79622d32; + s3 += 0x6b206574; + s4 += pState->key[0]; + s5 += pState->key[1]; + s6 += pState->key[2]; + s7 += pState->key[3]; + s8 += pState->key[4]; + s9 += pState->key[5]; + s10 += pState->key[6]; + s11 += pState->key[7]; + s12 += counter; + s13 += pState->nonce[0]; + s14 += pState->nonce[1]; + s15 += pState->nonce[2]; + + SYMCRYPT_STORE_LSBFIRST32( pbDst + 0, s0 ^ SYMCRYPT_LOAD_LSBFIRST32( pbSrc + 0 ) ); + SYMCRYPT_STORE_LSBFIRST32( pbDst + 4, s1 ^ SYMCRYPT_LOAD_LSBFIRST32( pbSrc + 4 ) ); + SYMCRYPT_STORE_LSBFIRST32( pbDst + 8, s2 ^ SYMCRYPT_LOAD_LSBFIRST32( pbSrc + 8 ) ); + SYMCRYPT_STORE_LSBFIRST32( pbDst + 12, s3 ^ SYMCRYPT_LOAD_LSBFIRST32( pbSrc + 12 ) ); + SYMCRYPT_STORE_LSBFIRST32( pbDst + 16, s4 ^ SYMCRYPT_LOAD_LSBFIRST32( pbSrc + 16 ) ); + SYMCRYPT_STORE_LSBFIRST32( pbDst + 20, s5 ^ SYMCRYPT_LOAD_LSBFIRST32( pbSrc + 20 ) ); + SYMCRYPT_STORE_LSBFIRST32( pbDst + 24, s6 ^ SYMCRYPT_LOAD_LSBFIRST32( pbSrc + 24 ) ); + SYMCRYPT_STORE_LSBFIRST32( pbDst + 28, s7 ^ SYMCRYPT_LOAD_LSBFIRST32( pbSrc + 28 ) ); + SYMCRYPT_STORE_LSBFIRST32( pbDst + 32, s8 ^ SYMCRYPT_LOAD_LSBFIRST32( pbSrc + 32 ) ); + SYMCRYPT_STORE_LSBFIRST32( pbDst + 36, s9 ^ SYMCRYPT_LOAD_LSBFIRST32( pbSrc + 36 ) ); + SYMCRYPT_STORE_LSBFIRST32( pbDst + 40, s10 ^ SYMCRYPT_LOAD_LSBFIRST32( pbSrc + 40 ) ); + SYMCRYPT_STORE_LSBFIRST32( pbDst + 44, s11 ^ SYMCRYPT_LOAD_LSBFIRST32( pbSrc + 44 ) ); + SYMCRYPT_STORE_LSBFIRST32( pbDst + 48, s12 ^ SYMCRYPT_LOAD_LSBFIRST32( pbSrc + 48 ) ); + SYMCRYPT_STORE_LSBFIRST32( pbDst + 52, s13 ^ SYMCRYPT_LOAD_LSBFIRST32( pbSrc + 52 ) ); + SYMCRYPT_STORE_LSBFIRST32( pbDst + 56, s14 ^ SYMCRYPT_LOAD_LSBFIRST32( pbSrc + 56 ) ); + SYMCRYPT_STORE_LSBFIRST32( pbDst + 60, s15 ^ SYMCRYPT_LOAD_LSBFIRST32( pbSrc + 60 ) ); + + counter ++; + // If counter overflows then the caller has encrypted more than 256GB of data with a single stream, which is + // called out as being insecure. It is the caller's responsibility to avoid this! + pbSrc += 64; + pbDst += 64; + cbData -= 64; + pState->offset += 64; + } +} + +static const BYTE chacha20KatAnswer[ 3 ] = { 0xb5, 0xe0, 0x54 }; + +VOID +SYMCRYPT_CALL +SymCryptChaCha20Selftest(void) +{ + BYTE buf[3]; + SYMCRYPT_CHACHA20_STATE state; + + SymCryptChaCha20Init( &state, + SymCryptTestKey32, sizeof( SymCryptTestKey32 ), + SymCryptTestMsg16, 12, + 0 ); + + SymCryptChaCha20Crypt( &state, SymCryptTestMsg3, buf, sizeof( buf ) ); + + SymCryptInjectError( buf, sizeof( buf ) ); + + if( memcmp( buf, chacha20KatAnswer, sizeof( buf )) != 0 ) + { + SymCryptFatal( 'Cha2' ); + } +} diff --git a/libs/symcrypt/lib/chacha20_poly1305.c b/libs/symcrypt/lib/chacha20_poly1305.c new file mode 100644 index 00000000000..5b46a6d209e --- /dev/null +++ b/libs/symcrypt/lib/chacha20_poly1305.c @@ -0,0 +1,257 @@ +// +// ChaCha20_Poly1305.c +// +// Copyright (c) Microsoft Corporation. +// + +#include "precomp.h" + +#define CHACHA20_POLY1305_MAX_DATA_SIZE (((1ull << 32) - 1) * 64) + +// Compile time BOOL statically determines if we need to check cbData > CHACHA20_POLY1305_MAX_DATA_SIZE +// Used to suppress MSVC C4127 and clang Wtautological-constant-out-of-range-compare on 32b platforms +const BOOL fcbDataLteMaxDataSizeStatic = SIZE_T_MAX <= CHACHA20_POLY1305_MAX_DATA_SIZE; + +VOID +SYMCRYPT_CALL +SymCryptChaCha20Poly1305ComputeTag( + _Inout_ PSYMCRYPT_POLY1305_STATE pState, + _In_reads_opt_( cbAuthData ) PCBYTE pbAuthData, + SIZE_T cbAuthData, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_POLY1305_RESULT_SIZE ) PBYTE pbTag ) +{ + SYMCRYPT_ALIGN BYTE buf[SYMCRYPT_POLY1305_BLOCK_SIZE]; + BYTE partialBlockSize; + + SymCryptWipeKnownSize( buf, SYMCRYPT_POLY1305_BLOCK_SIZE ); + + // Add additional authentication data if needed. + if ( cbAuthData > 0 ) + { + SymCryptPoly1305Append( pState, pbAuthData, cbAuthData ); + + // Append zeros to make a complete Poly1305 block. + partialBlockSize = cbAuthData % SYMCRYPT_POLY1305_BLOCK_SIZE; + if ( partialBlockSize > 0 ) + { + SymCryptPoly1305Append( pState, buf, SYMCRYPT_POLY1305_BLOCK_SIZE - partialBlockSize ); + } + } + + // Add ciphertext if needed. + if ( cbData > 0 ) + { + SymCryptPoly1305Append( pState, pbData, cbData ); + + // Append zeros to make a complete Poly1305 block. + partialBlockSize = cbData % SYMCRYPT_POLY1305_BLOCK_SIZE; + if ( partialBlockSize > 0 ) + { + SymCryptPoly1305Append( pState, buf, SYMCRYPT_POLY1305_BLOCK_SIZE - partialBlockSize ); + } + } + + // Add length of additional authentication data and ciphertext. + SYMCRYPT_STORE_LSBFIRST64( &buf[0], cbAuthData ); + SYMCRYPT_STORE_LSBFIRST64( &buf[8], cbData ); + SymCryptPoly1305Append( pState, buf, SYMCRYPT_POLY1305_BLOCK_SIZE ); + SymCryptWipeKnownSize( buf, SYMCRYPT_POLY1305_BLOCK_SIZE ); + + SymCryptPoly1305Result( pState, pbTag ); +} + +SYMCRYPT_NOINLINE +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptChaCha20Poly1305Encrypt( + _In_reads_( cbKey ) PCBYTE pbKey, + SIZE_T cbKey, + _In_reads_( cbNonce ) PCBYTE pbNonce, + SIZE_T cbNonce, + _In_reads_opt_( cbAuthData ) PCBYTE pbAuthData, + SIZE_T cbAuthData, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData, + _Out_writes_( cbTag ) PBYTE pbTag, + SIZE_T cbTag ) +{ + SYMCRYPT_ERROR status = SYMCRYPT_NO_ERROR; + SYMCRYPT_CHACHA20_STATE ChaCha20State; + SYMCRYPT_POLY1305_STATE Poly1305State; + SYMCRYPT_ALIGN BYTE key[SYMCRYPT_POLY1305_KEY_SIZE]; + + if ( !fcbDataLteMaxDataSizeStatic && cbData > CHACHA20_POLY1305_MAX_DATA_SIZE ) + { + status = SYMCRYPT_WRONG_DATA_SIZE; + goto cleanup; + } + + if ( cbTag != SYMCRYPT_POLY1305_RESULT_SIZE ) + { + status = SYMCRYPT_WRONG_TAG_SIZE; + goto cleanup; + } + + status = SymCryptChaCha20Init( &ChaCha20State, pbKey, cbKey, pbNonce, cbNonce, 0 ); + if ( status != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Generate the first 32 bytes of keystream. + SymCryptWipeKnownSize( key, sizeof( key ) ); + SymCryptChaCha20Crypt( &ChaCha20State, key, key, sizeof ( key ) ); + + // Create the Poly1305 key using the first 32 bytes of the ChaCha20 keystream. + SymCryptPoly1305Init( &Poly1305State, key ); + SymCryptWipeKnownSize( key, sizeof( key ) ); + + // Encrypt data if needed. + if ( cbData > 0 ) + { + // Advance the keystream to counter 1 (offset 64) for data encryption. + SymCryptChaCha20SetOffset( &ChaCha20State, 64 ); + SymCryptChaCha20Crypt( &ChaCha20State, pbSrc, pbDst, cbData ); + } + + // We read the ciphertext back, violating the general rule not to rely on I/O buffers + // as they can reside in a different security domain. For ChaCha20Poly1305, like GCM, + // this read-back of data is not a problem. An attacker with access to the buffer + // will get the ChaCha20 key stream plus the Poly1305 authenticator of a single value. + // As Poly1305 is strong even with attacker-controlled data, this is harmless. + SymCryptChaCha20Poly1305ComputeTag( &Poly1305State, pbAuthData, cbAuthData, + pbDst, cbData, pbTag ); +cleanup: + + SymCryptWipeKnownSize( &ChaCha20State, sizeof( ChaCha20State ) ); + SymCryptWipeKnownSize( &Poly1305State, sizeof( Poly1305State ) ); + + return status; +} + +SYMCRYPT_NOINLINE +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptChaCha20Poly1305Decrypt( + _In_reads_( cbKey ) PCBYTE pbKey, + SIZE_T cbKey, + _In_reads_( cbNonce ) PCBYTE pbNonce, + SIZE_T cbNonce, + _In_reads_opt_( cbAuthData ) PCBYTE pbAuthData, + SIZE_T cbAuthData, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData, + _In_reads_( cbTag ) PCBYTE pbTag, + SIZE_T cbTag ) +{ + SYMCRYPT_ERROR status = SYMCRYPT_NO_ERROR; + SYMCRYPT_CHACHA20_STATE ChaCha20State; + SYMCRYPT_POLY1305_STATE Poly1305State; + SYMCRYPT_ALIGN BYTE buf[SYMCRYPT_POLY1305_RESULT_SIZE]; + SYMCRYPT_ALIGN BYTE key[SYMCRYPT_POLY1305_KEY_SIZE]; + + if ( !fcbDataLteMaxDataSizeStatic && cbData > CHACHA20_POLY1305_MAX_DATA_SIZE ) + { + status = SYMCRYPT_WRONG_DATA_SIZE; + goto cleanup; + } + + if ( cbTag != SYMCRYPT_POLY1305_RESULT_SIZE ) + { + status = SYMCRYPT_WRONG_TAG_SIZE; + goto cleanup; + } + + status = SymCryptChaCha20Init( &ChaCha20State, pbKey, cbKey, pbNonce, cbNonce, 0 ); + if ( status != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Generate the first 32 bytes of keystream. + SymCryptWipeKnownSize( key, sizeof( key ) ); + SymCryptChaCha20Crypt( &ChaCha20State, key, key, sizeof( key ) ); + + // Create the Poly1305 key using the first 32 bytes of the ChaCha20 keystream. + SymCryptPoly1305Init( &Poly1305State, key ); + SymCryptWipeKnownSize( key, sizeof( key ) ); + + // We read the ciphertext back, violating the general rule not to rely on I/O buffers + // as they can reside in a different security domain. For ChaCha20Poly1305, like GCM, + // this read-back of data is not a problem. An attacker with access to the buffer + // will get the ChaCha20 key stream plus the Poly1305 authenticator of a single value. + // As Poly1305 is strong even with attacker-controlled data, this is harmless. + SymCryptChaCha20Poly1305ComputeTag( &Poly1305State, pbAuthData, cbAuthData, + pbSrc, cbData, buf ); + + // Validate tag. + if (!SymCryptEqual(pbTag, buf, cbTag)) + { + status = SYMCRYPT_AUTHENTICATION_FAILURE; + goto cleanup; + } + + // Decrypt data if needed. + if ( cbData > 0) + { + // Advance the keystream to counter 1 (offset 64) for data decryption. + SymCryptChaCha20SetOffset( &ChaCha20State, 64 ); + SymCryptChaCha20Crypt( &ChaCha20State, pbSrc, pbDst, cbData ); + } + +cleanup: + + SymCryptWipeKnownSize( &ChaCha20State, sizeof( ChaCha20State ) ); + SymCryptWipeKnownSize( &Poly1305State, sizeof( Poly1305State ) ); + + return status; +} + + +static const BYTE SymCryptChaCha20Poly1305Result[3 + SYMCRYPT_POLY1305_RESULT_SIZE] = +{ + 0x5d, 0xba, 0x7b, + 0x80, 0x10, 0xd2, 0x05, 0x4a, 0xad, 0x53, 0x1f, 0xa2, 0xce, 0x83, 0xc1, 0x66, 0x12, 0x85, 0x21 +}; + +VOID +SYMCRYPT_CALL +SymCryptChaCha20Poly1305Selftest(void) +{ + BYTE buf[3 + SYMCRYPT_POLY1305_RESULT_SIZE]; + SYMCRYPT_ERROR err; + + if ( SymCryptChaCha20Poly1305Encrypt( SymCryptTestKey32, sizeof( SymCryptTestKey32 ), + SymCryptTestMsg16, 12, + SymCryptTestMsg16, sizeof( SymCryptTestMsg16 ), + &SymCryptTestMsg3[0], buf, 3, + &buf[3], SYMCRYPT_POLY1305_RESULT_SIZE ) != SYMCRYPT_NO_ERROR ) + { + SymCryptFatal( 'ccp0' ); + } + + SymCryptInjectError( buf, sizeof( buf ) ); + if ( memcmp( buf, SymCryptChaCha20Poly1305Result, sizeof( buf ) ) != 0 ) + { + SymCryptFatal( 'ccp1' ); + } + + // Inject error into the ciphertext or tag. + SymCryptInjectError( buf, sizeof( buf ) ); + + err = SymCryptChaCha20Poly1305Decrypt( SymCryptTestKey32, sizeof( SymCryptTestKey32 ), + SymCryptTestMsg16, 12, + SymCryptTestMsg16, sizeof( SymCryptTestMsg16 ), + buf, buf, 3, + &buf[3], SYMCRYPT_POLY1305_RESULT_SIZE ); + SymCryptInjectError( buf, 3 ); + + if ( err != SYMCRYPT_NO_ERROR || memcmp( buf, SymCryptTestMsg3, 3 ) != 0 ) + { + SymCryptFatal( 'ccp2' ); + } +} diff --git a/libs/symcrypt/lib/cpuid.c b/libs/symcrypt/lib/cpuid.c new file mode 100644 index 00000000000..cf31adfd5ae --- /dev/null +++ b/libs/symcrypt/lib/cpuid.c @@ -0,0 +1,419 @@ +// +// cpuid.c code for CPU feature detection based on CPUID +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + + +#include "precomp.h" + +#if (SYMCRYPT_CPU_ARM | SYMCRYPT_CPU_ARM64) & SYMCRYPT_MS_VC +#include <excpt.h> +#endif + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + +#ifdef __clang__ +#pragma clang attribute push (__attribute__((target("xsave"))), apply_to=function) +#else +#pragma GCC push_options +#pragma GCC target("xsave") +#endif + +// +// RDRAND availability is signaled by CPUID.1.ecx[30] +// PCLMULQDQ availability is signaled by CPUID.1.ecx[1] +// AES_NI availability is signaled by CPUID.1.ecx[25] +// SSSE3 availability is signaled by CPUID.1.ecx[9] +// SSE3 availability is signaled by CPUID.1.ecx[0] +// SSE2 availability is signaled by CPUID.1.edx[26] +// + +#define CPUID_1_ECX_RDRAND_BIT 30 +#define CPUID_1_ECX_PCLMULQDQ_BIT 1 +#define CPUID_1_ECX_AESNI_BIT 25 +#define CPUID_1_ECX_SSSE3_BIT 9 +#define CPUID_1_ECX_SSE3_BIT 0 +#define CPUID_1_EDX_SSE2_BIT 26 +#define CPUID_1_EDX_SSE_BIT 25 +#define CPUID_1_ECX_AVX_BIT 28 +#define CPUID_1_ECX_CMPXCHG16B_BIT 13 +#define CPUID_70_EBX_AVX2_BIT 5 +#define CPUID_70_EBX_RDSEED_BIT 18 +#define CPUID_70_EBX_SHANI_BIT 29 +#define CPUID_70_EBX_ADX_BIT 19 +#define CPUID_70_EBX_BMI2_BIT 8 +#define CPUID_70_EBX_AVX512F_BIT 16 +#define CPUID_70_EBX_AVX512BW_BIT 30 +#define CPUID_70_EBX_AVX512DQ_BIT 17 +#define CPUID_70_EBX_AVX512VL_BIT 31 +#define CPUID_70_ECX_VAES_BIT 9 +#define CPUID_70_ECX_VPCLMULQDQ_BIT 10 + + +#define CPUID_1_ECX_OSXSAVE_BIT 27 + +typedef struct _CPUID_BIT_INFO { + BYTE leaf; + BYTE word; + BYTE bitno; + SYMCRYPT_CPU_FEATURES requiredBy; +} CPUID_BIT_INFO; + +#define WORD_EAX 0 +#define WORD_EBX 1 +#define WORD_ECX 2 +#define WORD_EDX 3 + +int g_SymCryptCpuid1[4]; // We cache the results of CPUID(1) to help diagnose CPU detection errors + +const +CPUID_BIT_INFO cpuidBitInfo[] = { + {1, WORD_ECX, CPUID_1_ECX_RDRAND_BIT, SYMCRYPT_CPU_FEATURE_RDRAND }, + {1, WORD_ECX, CPUID_1_ECX_PCLMULQDQ_BIT, SYMCRYPT_CPU_FEATURE_PCLMULQDQ }, + {1, WORD_ECX, CPUID_1_ECX_AESNI_BIT, SYMCRYPT_CPU_FEATURE_AESNI }, + {1, WORD_EDX, CPUID_1_EDX_SSE_BIT, SYMCRYPT_CPU_FEATURE_SSE2 | SYMCRYPT_CPU_FEATURE_SSSE3 }, + {1, WORD_EDX, CPUID_1_EDX_SSE2_BIT, SYMCRYPT_CPU_FEATURE_SSE2 | SYMCRYPT_CPU_FEATURE_SSSE3 }, + {1, WORD_ECX, CPUID_1_ECX_SSE3_BIT, SYMCRYPT_CPU_FEATURE_SSSE3 }, + {1, WORD_ECX, CPUID_1_ECX_SSSE3_BIT, SYMCRYPT_CPU_FEATURE_SSSE3 }, + {1, WORD_ECX, CPUID_1_ECX_AVX_BIT, SYMCRYPT_CPU_FEATURE_AVX2 }, + {1, WORD_ECX, CPUID_1_ECX_CMPXCHG16B_BIT, SYMCRYPT_CPU_FEATURE_CMPXCHG16B }, + {7, WORD_EBX, CPUID_70_EBX_AVX2_BIT, SYMCRYPT_CPU_FEATURE_AVX2 }, + {7, WORD_EBX, CPUID_70_EBX_RDSEED_BIT, SYMCRYPT_CPU_FEATURE_RDSEED }, + {7, WORD_EBX, CPUID_70_EBX_SHANI_BIT, SYMCRYPT_CPU_FEATURE_SHANI }, + {7, WORD_EBX, CPUID_70_EBX_ADX_BIT, SYMCRYPT_CPU_FEATURE_ADX }, + {7, WORD_EBX, CPUID_70_EBX_BMI2_BIT, SYMCRYPT_CPU_FEATURE_BMI2 }, + {7, WORD_EBX, CPUID_70_EBX_AVX512F_BIT, SYMCRYPT_CPU_FEATURE_AVX512 }, + {7, WORD_EBX, CPUID_70_EBX_AVX512VL_BIT, SYMCRYPT_CPU_FEATURE_AVX512 }, + {7, WORD_EBX, CPUID_70_EBX_AVX512BW_BIT, SYMCRYPT_CPU_FEATURE_AVX512 }, + {7, WORD_EBX, CPUID_70_EBX_AVX512DQ_BIT, SYMCRYPT_CPU_FEATURE_AVX512 }, + {7, WORD_ECX, CPUID_70_ECX_VAES_BIT, SYMCRYPT_CPU_FEATURE_VAES }, + {7, WORD_ECX, CPUID_70_ECX_VPCLMULQDQ_BIT, SYMCRYPT_CPU_FEATURE_VAES }, +}; + +VOID +SYMCRYPT_CALL +SymCryptDetectCpuFeaturesByCpuid( UINT32 flags ) +{ + UINT32 result; + int CPUInfo[4]; + int InfoType; + int maxInfoType; + int i; + BOOLEAN allowYmm, allowZmm; + INT64 xGetBvResult; + + // + // Mark all features as present (the result bits indicate not-present, so set the features we know to 0). + // + result = ~ (UINT32)( + SYMCRYPT_CPU_FEATURE_SSE2 | + SYMCRYPT_CPU_FEATURE_SSSE3 | + SYMCRYPT_CPU_FEATURE_AESNI | + SYMCRYPT_CPU_FEATURE_PCLMULQDQ | + SYMCRYPT_CPU_FEATURE_AVX2 | + SYMCRYPT_CPU_FEATURE_SHANI | + SYMCRYPT_CPU_FEATURE_BMI2 | + SYMCRYPT_CPU_FEATURE_ADX | + SYMCRYPT_CPU_FEATURE_RDRAND | + SYMCRYPT_CPU_FEATURE_RDSEED | + SYMCRYPT_CPU_FEATURE_AVX512 | + SYMCRYPT_CPU_FEATURE_VAES | + SYMCRYPT_CPU_FEATURE_CMPXCHG16B + ); + + // InfoType holds the function id of previous cpuid + // so we don't have to repeatedly invoke cpuid. + InfoType = 0; + SymCryptCpuidExFunc( CPUInfo, InfoType, 0 ); + maxInfoType = CPUInfo[WORD_EAX]; + + for( i=0; i< sizeof( cpuidBitInfo ) / sizeof( *cpuidBitInfo ); i++ ) + { + if( cpuidBitInfo[i].leaf != InfoType ) + { + InfoType = cpuidBitInfo[i].leaf; + SymCryptCpuidExFunc( CPUInfo, InfoType, 0 ); + } + if( cpuidBitInfo[i].leaf > maxInfoType || (CPUInfo[ cpuidBitInfo[i].word ] & (1UL << cpuidBitInfo[i].bitno) ) == 0 ) + { + result |= cpuidBitInfo[i].requiredBy; + } + } + + if( (flags & SYMCRYPT_CPUID_DETECT_FLAG_CHECK_OS_SUPPORT_FOR_YMM) != 0 ) + { + // + // Check for OS support of the YMM registers. + // This detection is optional in any environment because some environments are single-threaded, and + // OS support is not required. (E.g. Boot library.) + // + // We use the following logic: + // Check that the OSXSAVE bit is 1, which means we can use XGETBV + // Use XGETBV and check that XCR0[2:1] = '11b' signaling that both XMM and YMM are enabled by OS + // Note that we only disable the AVX2 usage; AESNI & XMM registers are used independent of OS support, because + // all our (known) OSes have it. + // + allowYmm = FALSE; + allowZmm = FALSE; + SymCryptCpuidExFunc( CPUInfo, 1, 0 ); + + if( (CPUInfo[WORD_ECX] & (1 << CPUID_1_ECX_OSXSAVE_BIT)) != 0 ) + { + // OSXSAVE bit is set, we can use XGETBV + xGetBvResult = _xgetbv( _XCR_XFEATURE_ENABLED_MASK ); + + // Check that bits 1 and 2 are set, corresponding to the XMM and YMM register state + if( (xGetBvResult & 0x6) == 0x6) + { + allowYmm = TRUE; + + // + // For AVX-512, also check that bits 5, 6, and 7 are set, corresponding to the + // opmask, ZMM (0-15), and ZMM (16-31) register states + // This follows the recommendation in the Intel 64 and IA-32 Architectures Software + // Developer's Manual, Volume 1, 15.3 / 15.4. + // + // It seems plausible that on some system the OS would not support save/restore of + // AVX-512 state, but use of AVX-512VL instructions on Ymm or Xmm registers would be + // OK, however Intel explicitly suggests that we should only use AVX512-VL if the + // support is indicated by xgetbv, so we use the same logic as for AVX2 (our + // SymCrypt feature indicates both CPU support, and OS support for saving/restoring + // the extended state) + // + if( (xGetBvResult & 0xe0) == 0xe0) + { + allowZmm = TRUE; + } + } + } + + if( !allowYmm ) + { + // Disallow the AVX2-dependent code because we don't have OS YMM support. + result |= SYMCRYPT_CPU_FEATURE_AVX2; + } + + if( !allowZmm ) + { + // Disallow any AVX512-dependent code because we don't have OS ZMM support. + // Note that not all AVX-512 dependent code will need to save/restore ZMM state, but we + // do not support AVX-512 instructions (even acting on YMM or XMM registers), unless the + // OS indicates support via XCR0 + result |= SYMCRYPT_CPU_FEATURE_AVX512; + } + } + + + if( (result & SYMCRYPT_CPU_FEATURE_AESNI) == 0 ) // thus, if AES-NI is present according to CPUID + { + // + // In Win7 Beta we had an interesting crash bucket. + // It only occurred on the AsusTek A6K line of laptops which sometimes + // set the cpuid AES-NI bit (but not always). This leads to a crash as + // we start using AES instructions that don't exist on those machines. + // + // I found on-line reviews for the A6K line from december 2005 so it was launched around + // that time. + // + // These laptops all have AMD CPUs, so we fix it by locking out the particular AMD CPUs + // families that don't have AES-NI anyway. + // + // We really shouldn't need this logic, and it only slows things down. + // We should be able to remove it at some point in the future. + // + // At AMD's recommendation, we use the logic below. + // The AMD engineers reviewed this code to ensure we don't lock out future CPUs + // that will have AES-NI. + // + SymCryptCpuidExFunc( CPUInfo, 0, 0 ); + if( CPUInfo[WORD_EBX] == 'htuA' + && CPUInfo[WORD_ECX] == 'DMAc' + && CPUInfo[WORD_EDX] == 'itne' ) + { + // + // We have an AMD cpu, check the family. + // + UINT32 baseFamily; + UINT32 extFamily; + UINT32 family; + + // + // Extract the base family and extended family values, and combine them to the full + // family value. + // + SymCryptCpuidExFunc( CPUInfo, 1, 0 ); + + baseFamily = (CPUInfo[WORD_EAX] >> 8) & 0xf; + + extFamily = (CPUInfo[WORD_EAX] >> 20) & 0xff; + + if( baseFamily < 0xf ) + { + family = baseFamily; + } else { + family = baseFamily + extFamily; + } + + // + // AMD will not implement the AES instruction set until family 0x15 + // + if( family < 0x15 ) + { + result |= SYMCRYPT_CPU_FEATURE_AESNI; + } + } + } + + SymCryptCpuidExFunc( g_SymCryptCpuid1, 1, 0 ); // Keep cache of CPUID results for diagnosis + + g_SymCryptCpuFeaturesNotPresent = (SYMCRYPT_CPU_FEATURES) result; +} + +#ifdef __clang__ +#pragma clang attribute pop +#else +#pragma GCC pop_options +#endif + +#elif SYMCRYPT_CPU_ARM && 0 + +#define CP15_ISAR5 15, 0, 0, 2, 5 // Instruction Set Attribute Register 5 + +#define READ_ARM_FEATURE(_FeatureRegister, _Index) \ + (((ULONG)_MoveFromCoprocessor(_FeatureRegister) >> ((_Index) * 4)) & 0xF) + +#define ISAR5_AES 1 +#define ISAR5_AES_AESE 1 +#define ISAR5_AES_PMULL 2 + +#define ISAR5_SHA2 3 +#define ISAR5_SHA2_SHA256H 1 + +#define ISAR5_CRC32 4 +#define ISAR5_CRC32_IMP 1 + +VOID +SYMCRYPT_CALL +SymCryptDetectCpuFeaturesFromRegisters(void) +{ + UINT32 result; + +#if 0 // We currently do not use any neon crypto features on ARM code, so no detection needed. + + // + // We start with a result that allows everything. + // This makes the code simpler when you have one CPU feature flag that disables multiple feature bits. + // + result = ~ (UINT32)( + SYMCRYPT_CPU_FEATURE_NEON | + SYMCRYPT_CPU_FEATURE_NEON_AES | + SYMCRYPT_CPU_FEATURE_NEON_PMULL | + SYMCRYPT_CPU_FEATURE_NEON_SHA256 + ); + + // + // Reading the status registers might fail, so we use a try block. + // + try { + + if( READ_ARM_FEATURE(CP15_ISAR5, ISAR5_AES) < ISAR5_AES_AESE ) + { + result |= SYMCRYPT_CPU_FEATURE_NEON_AES; + } + + if( READ_ARM_FEATURE(CP15_ISAR5, ISAR5_AES) < ISAR5_AES_PMULL ) + { + result |= SYMCRYPT_CPU_FEATURE_NEON_PMULL; + } + + if( READ_ARM_FEATURE(CP15_ISAR5, ISAR5_SHA2) < ISAR5_SHA2_SHA256H ) + { + result |= SYMCRYPT_CPU_FEATURE_NEON_SHA256; + } + + } except(EXCEPTION_EXECUTE_HANDLER) { + // + // Something went wrong reading the registers; disable all the crypto extensions leaving only the standard NEON registers available. + // + result |= SYMCRYPT_CPU_FEATURE_NEON_AES | SYMCRYPT_CPU_FEATURE_NEON_PMULL | SYMCRYPT_CPU_FEATURE_NEON_SHA256; + } +#endif + // + // For now we ignore the new instructions in ARM until we can get clarity on how to detect Arm32-on-Arm64. + // + result = ~(UINT32)SYMCRYPT_CPU_FEATURE_NEON; + + g_SymCryptCpuFeaturesNotPresent = (SYMCRYPT_CPU_FEATURES) result; +} + + +#elif SYMCRYPT_CPU_ARM64 && 0 + + +#define ARM64_SYSREG(op0, op1, crn, crm, op2) \ + ( ((op0 & 1) << 14) | \ + ((op1 & 7) << 11) | \ + ((crn & 15) << 7) | \ + ((crm & 15) << 3) | \ + ((op2 & 7) << 0) ) + +#define ARM64_ID_AA64ISAR0_EL1 ARM64_SYSREG(3,0, 0, 6,0) // ISA Feature Register 0 + +#define ISAR0_AES 1 +#define ISAR0_AES_NI 0 +#define ISAR0_AES_INSTRUCTIONS 1 +#define ISAR0_AES_PLUS_PMULL64 2 + +#define ISAR0_SHA2 3 +#define ISAR0_SHA2_NI 0 +#define ISAR0_SHA2_INSTRUCTIONS 1 + +#define ISAR0_CRC32 4 +#define ISAR0_CRC32_NI 0 +#define ISAR0_CRC32_INSTRUCTIONS 1 + +#define READ_ARM64_FEATURE(_FeatureRegister, _Index) \ + (((ULONG64)_ReadStatusReg(_FeatureRegister) >> ((_Index) * 4)) & 0xF) + +VOID +SYMCRYPT_CALL +SymCryptDetectCpuFeaturesFromRegisters(void) +{ + UINT32 result; + + result = ~ (UINT32)( + SYMCRYPT_CPU_FEATURE_NEON | + SYMCRYPT_CPU_FEATURE_NEON_AES | + SYMCRYPT_CPU_FEATURE_NEON_PMULL | + SYMCRYPT_CPU_FEATURE_NEON_SHA256 + ); + +#if SYMCRYPT_MS_VC + __try { + + if( READ_ARM64_FEATURE(ARM64_ID_AA64ISAR0_EL1, ISAR0_AES) < ISAR0_AES_INSTRUCTIONS ) + { + result |= SYMCRYPT_CPU_FEATURE_NEON_AES; + } + + if( READ_ARM64_FEATURE(ARM64_ID_AA64ISAR0_EL1, ISAR0_AES) < ISAR0_AES_PLUS_PMULL64 ) + { + result |= SYMCRYPT_CPU_FEATURE_NEON_PMULL; + } + + if( READ_ARM64_FEATURE(ARM64_ID_AA64ISAR0_EL1, ISAR0_SHA2) < ISAR0_SHA2_INSTRUCTIONS ) + { + result |= SYMCRYPT_CPU_FEATURE_NEON_SHA256; + } + + g_SymCryptCpuFeaturesNotPresent = (SYMCRYPT_CPU_FEATURES) result; + + } __except(EXCEPTION_EXECUTE_HANDLER) { + ; //NOTHING; + } +#endif + +} + +#endif // CPU arch selection diff --git a/libs/symcrypt/lib/cpuid_um.c b/libs/symcrypt/lib/cpuid_um.c new file mode 100644 index 00000000000..64e7e7d4fbe --- /dev/null +++ b/libs/symcrypt/lib/cpuid_um.c @@ -0,0 +1,131 @@ +// +// cpuid_um.c code for CPU feature detection based on OS features available in user-mode +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// This file contains the feature detection code that is only compiled for user-mode. +// The IsProcessorFeaturePresent API is only in UM, so linking any code out of +// a source file that contains a call to it doesn't work for KM code. +// By splitting it into a separate file, the code is ignored by KM callers because +// they never reference anything in this file. +// + + + +#include "precomp.h" + +#if SYMCRYPT_CPU_ARM64 && SYMCRYPT_PLATFORM_WINDOWS +#undef UNREFERENCED_PARAMETER +#include <processthreadsapi.h> + +// From winnt.h +#define PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE 30 + +VOID +SYMCRYPT_CALL +SymCryptDetectCpuFeaturesFromIsProcessorFeaturePresent(void) +{ + if( IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) ) + { + g_SymCryptCpuFeaturesNotPresent = (SYMCRYPT_CPU_FEATURES) ~( + SYMCRYPT_CPU_FEATURE_NEON | + SYMCRYPT_CPU_FEATURE_NEON_AES | + SYMCRYPT_CPU_FEATURE_NEON_PMULL | + SYMCRYPT_CPU_FEATURE_NEON_SHA256 + ); + } else { + g_SymCryptCpuFeaturesNotPresent = (SYMCRYPT_CPU_FEATURES) ~SYMCRYPT_CPU_FEATURE_NEON; + } +} + +#elif SYMCRYPT_CPU_ARM64 && SYMCRYPT_GNUC + +#if SYMCRYPT_PLATFORM_APPLE +#include <sys/sysctl.h> + +VOID +SYMCRYPT_CALL +SymCryptDetectCpuFeaturesFromIsProcessorFeaturePresent(void) +{ + // Arm64 code relies on presence of ASIMD everywhere (it is always present with Armv8); the + // compiler is permitted to generate ASIMD instructions anywhere + // The SYMCRYPT_CPU_FEATURE_NEON is currently always present and never checked + SYMCRYPT_CPU_FEATURES result = ~SYMCRYPT_CPU_FEATURE_NEON; + + // On macOS ARM64, we use sysctl to query CPU features + // All Apple Silicon Macs support AES, PMULL, and SHA2 instructions + uint32_t has_feature = 0; + size_t len = sizeof(has_feature); + + // Check for AES support + if( sysctlbyname("hw.optional.arm.FEAT_AES", &has_feature, &len, NULL, 0) == 0 && has_feature ) + { + result &= ~SYMCRYPT_CPU_FEATURE_NEON_AES; + } + + // Check for PMULL support + has_feature = 0; + len = sizeof(has_feature); + if( sysctlbyname("hw.optional.arm.FEAT_PMULL", &has_feature, &len, NULL, 0) == 0 && has_feature ) + { + result &= ~SYMCRYPT_CPU_FEATURE_NEON_PMULL; + } + + // Check for SHA2 support + has_feature = 0; + len = sizeof(has_feature); + if( sysctlbyname("hw.optional.arm.FEAT_SHA256", &has_feature, &len, NULL, 0) == 0 && has_feature ) + { + result &= ~SYMCRYPT_CPU_FEATURE_NEON_SHA256; + } + + g_SymCryptCpuFeaturesNotPresent = result; +} + +#else // Linux and other Unix platforms + +#include <sys/auxv.h> + +// #include <asm/hwcap.h> +#define HWCAP_AES (1 << 3) +#define HWCAP_PMULL (1 << 4) +#define HWCAP_SHA2 (1 << 6) + +VOID +SYMCRYPT_CALL +SymCryptDetectCpuFeaturesFromIsProcessorFeaturePresent(void) +{ + unsigned long hwcaps = getauxval( AT_HWCAP ); + + SYMCRYPT_CPU_FEATURES result = ~( + SYMCRYPT_CPU_FEATURE_NEON | + SYMCRYPT_CPU_FEATURE_NEON_AES | + SYMCRYPT_CPU_FEATURE_NEON_PMULL | + SYMCRYPT_CPU_FEATURE_NEON_SHA256 + ); + + // Arm64 code relies on presence of ASIMD everywhere (it is always present with Armv8); the + // compiler is permitted to generate ASIMD instructions anywhere + // The SYMCRYPT_CPU_FEATURE_NEON is currently always present and never checked + + if( !(hwcaps & HWCAP_AES) ) + { + result |= SYMCRYPT_CPU_FEATURE_NEON_AES; + } + + if( !(hwcaps & HWCAP_PMULL) ) + { + result |= SYMCRYPT_CPU_FEATURE_NEON_PMULL; + } + + if( !(hwcaps & HWCAP_SHA2) ) + { + result |= SYMCRYPT_CPU_FEATURE_NEON_SHA256; + } + + g_SymCryptCpuFeaturesNotPresent = result; +} + +#endif // SYMCRYPT_PLATFORM_APPLE + +#endif diff --git a/libs/symcrypt/lib/crt.c b/libs/symcrypt/lib/crt.c new file mode 100644 index 00000000000..85148646b7f --- /dev/null +++ b/libs/symcrypt/lib/crt.c @@ -0,0 +1,215 @@ +// +// crt.c Chinese Remainder Theorem Algorithms +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptCrtGenerateForTwoCoprimes( + _In_ PCSYMCRYPT_MODULUS pmP, + _In_ PCSYMCRYPT_MODULUS pmQ, + UINT32 flags, + _Out_ PSYMCRYPT_MODELEMENT peInvQModP, + _Out_ PSYMCRYPT_MODELEMENT peInvPModQ, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PCSYMCRYPT_INT piSrc1 = NULL; + PCSYMCRYPT_INT piSrc2 = NULL; + + PSYMCRYPT_INT piInvSrc1ModSrc2 = NULL; + PSYMCRYPT_INT piInvSrc2ModSrc1 = NULL; + + UINT32 nDigits = 0; + UINT32 cbInt = 0; + + BOOLEAN oddP = FALSE; + + SYMCRYPT_ASSERT( pmP != NULL ); + SYMCRYPT_ASSERT( pmQ != NULL ); + + nDigits = SYMCRYPT_MAX( SymCryptModulusDigitsizeOfObject( pmP ), SymCryptModulusDigitsizeOfObject( pmQ )); + + // Create two temporary integers + cbInt = SymCryptSizeofIntFromDigits( nDigits ); + + SYMCRYPT_ASSERT( cbScratch >= 2*cbInt + SYMCRYPT_SCRATCH_BYTES_FOR_EXTENDED_GCD( nDigits )); + + piInvSrc1ModSrc2 = SymCryptIntCreate( pbScratch, cbInt, nDigits ); pbScratch += cbInt; cbScratch -= cbInt; + piInvSrc2ModSrc1 = SymCryptIntCreate( pbScratch, cbInt, nDigits ); pbScratch += cbInt; cbScratch -= cbInt; + + oddP = ((SymCryptIntGetValueLsbits32(SymCryptIntFromModulus( (PSYMCRYPT_MODULUS) pmP )) & 1) == 1); + if (oddP) + { + piSrc1 = SymCryptIntFromModulus( (PSYMCRYPT_MODULUS) pmQ ); + piSrc2 = SymCryptIntFromModulus( (PSYMCRYPT_MODULUS) pmP ); + } + else + { + piSrc1 = SymCryptIntFromModulus( (PSYMCRYPT_MODULUS) pmP ); + piSrc2 = SymCryptIntFromModulus( (PSYMCRYPT_MODULUS) pmQ ); + } + + // IntExtendedGcd requirements: + // - First argument > 0 + // - Second argument odd + if( SymCryptIntIsEqualUint32(piSrc1, 0) || + ((SymCryptIntGetValueLsbits32(piSrc2) & 1) != 1) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Extended GCD + SymCryptIntExtendedGcd( piSrc1, piSrc2, flags, NULL, NULL, piInvSrc1ModSrc2, piInvSrc2ModSrc1, pbScratch, cbScratch ); + + if (oddP) + { + SymCryptIntToModElement( piInvSrc2ModSrc1, pmQ, peInvPModQ, pbScratch, cbScratch ); + SymCryptIntToModElement( piInvSrc1ModSrc2, pmP, peInvQModP, pbScratch, cbScratch ); + } + else + { + SymCryptIntToModElement( piInvSrc2ModSrc1, pmP, peInvQModP, pbScratch, cbScratch ); + SymCryptIntToModElement( piInvSrc1ModSrc2, pmQ, peInvPModQ, pbScratch, cbScratch ); + } + +cleanup: + return scError; +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptCrtGenerateInverses( + UINT32 nCoprimes, + _In_reads_( nCoprimes ) PCSYMCRYPT_MODULUS * ppmCoprimes, + UINT32 flags, + _Out_writes_( nCoprimes ) PSYMCRYPT_MODELEMENT * ppeCrtInverses, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + if (nCoprimes == 2) + { + SymCryptCrtGenerateForTwoCoprimes( + ppmCoprimes[0], + ppmCoprimes[1], + flags, + ppeCrtInverses[0], + ppeCrtInverses[1], + pbScratch, + cbScratch ); + } + else + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + +cleanup: + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptCrtSolve( + UINT32 nCoprimes, + _In_reads_( nCoprimes ) PCSYMCRYPT_MODULUS * ppmCoprimes, + _In_reads_( nCoprimes ) PCSYMCRYPT_MODELEMENT * ppeCrtInverses, + _In_reads_( nCoprimes ) PCSYMCRYPT_MODELEMENT * ppeCrtRemainders, + UINT32 flags, + _Out_ PSYMCRYPT_INT piSolution, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + SYMCRYPT_ASSERT( nCoprimes >= 2 ); + + PSYMCRYPT_INT piTmp = NULL; + PSYMCRYPT_MODELEMENT peTmp = NULL; + + PSYMCRYPT_INT piDouble = NULL; + + UINT32 nDigitsMax = 0; + + UINT32 cbInt = 0; + UINT32 cbModElement = 0; + UINT32 cbDouble = 0; + + UINT32 carry = 0; + + UNREFERENCED_PARAMETER( flags ); + + nDigitsMax = SYMCRYPT_MAX( SymCryptModulusDigitsizeOfObject( ppmCoprimes[0] ), SymCryptModulusDigitsizeOfObject( ppmCoprimes[1] ) ); + + cbInt = SymCryptSizeofIntFromDigits( nDigitsMax ); + cbModElement = SymCryptSizeofModElementFromModulus( ppmCoprimes[0] ); + cbDouble = SymCryptSizeofIntFromDigits( 2*nDigitsMax ); + + if( cbDouble == 0 ) + { + // It is possible that cbDouble would not fit within the maximum integer + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + SYMCRYPT_ASSERT( cbScratch >= cbInt + cbModElement + cbDouble + + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigitsMax ), + SYMCRYPT_SCRATCH_BYTES_FOR_INT_MUL( 2*nDigitsMax ) ) + ); + + // Create temporaries + piTmp = SymCryptIntCreate( pbScratch, cbInt, nDigitsMax ); pbScratch += cbInt; cbScratch -= cbInt; + + peTmp = SymCryptModElementCreate( pbScratch, cbModElement, ppmCoprimes[0] ); pbScratch += cbModElement; cbScratch -= cbModElement; + + piDouble = SymCryptIntCreate( pbScratch, cbDouble, 2*nDigitsMax ); pbScratch += cbDouble; cbScratch -= cbDouble; + + if (nCoprimes == 2) + { + // + // Let r0 and r1 be the two remainders modulo p and q respectively + // Then we calculate (q^{-1}(r0 - r1) mod p)*q + r1 + // + SymCryptModElementToInt( ppmCoprimes[1], ppeCrtRemainders[1], piTmp, pbScratch, cbScratch ); // Convert r1 to Int + SymCryptIntToModElement( piTmp, ppmCoprimes[0], peTmp, pbScratch, cbScratch ); // Convert it to r1 mod p + + SymCryptModSub( ppmCoprimes[0], ppeCrtRemainders[0], peTmp, peTmp, pbScratch, cbScratch ); // (r0 - r1) mod p + SymCryptModMul( ppmCoprimes[0], ppeCrtInverses[0], peTmp, peTmp, pbScratch, cbScratch ); // q^{-1}*(r0 - r1) mod p + SymCryptModElementToInt( ppmCoprimes[0], peTmp, piTmp, pbScratch, cbScratch ); // Convert it to integer + + SymCryptIntMulMixedSize( piTmp, SymCryptIntFromModulus((PSYMCRYPT_MODULUS)ppmCoprimes[1]), piDouble, pbScratch, cbScratch ); // Multiply by q + scError = SymCryptIntCopyMixedSize( piDouble, piSolution ); // Copy it into the solution + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + SymCryptModElementToInt( ppmCoprimes[1], ppeCrtRemainders[1], piTmp, pbScratch, cbScratch ); // Convert r1 to integer + + carry = SymCryptIntAddMixedSize( piTmp, piSolution, piSolution ); // Add it to the solution + + if (carry>0) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + } + else + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + +cleanup: + return scError; +} diff --git a/libs/symcrypt/lib/cshake_pattern.c b/libs/symcrypt/lib/cshake_pattern.c new file mode 100644 index 00000000000..89e346ef7dd --- /dev/null +++ b/libs/symcrypt/lib/cshake_pattern.c @@ -0,0 +1,152 @@ +// +// cshake_pattern.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#if 0 +#pragma makedep header +#endif + +// +// This source file implements cSHAKE128 and cSHAKE256 +// +// See the symcrypt.h file for documentation on what the various functions do. +// + + +// +// SymCryptCShake +// +VOID +SYMCRYPT_CALL +SYMCRYPT_Xxx( + _In_reads_( cbFunctionNameString ) PCBYTE pbFunctionNameString, + SIZE_T cbFunctionNameString, + _In_reads_( cbCustomizationString ) PCBYTE pbCustomizationString, + SIZE_T cbCustomizationString, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( cbResult ) PBYTE pbResult, + SIZE_T cbResult) +{ + SYMCRYPT_XXX_STATE state; + + SYMCRYPT_XxxInit(&state, + pbFunctionNameString, cbFunctionNameString, + pbCustomizationString, cbCustomizationString); + + SYMCRYPT_XxxAppend(&state, pbData, cbData); + SYMCRYPT_XxxExtract(&state, pbResult, cbResult, TRUE); +} + + +// +// SymCryptCShakeInit +// +VOID +SYMCRYPT_CALL +SYMCRYPT_XxxInit( + _Out_ PSYMCRYPT_XXX_STATE pState, + _In_reads_( cbFunctionNameString ) PCBYTE pbFunctionNameString, + SIZE_T cbFunctionNameString, + _In_reads_( cbCustomizationString ) PCBYTE pbCustomizationString, + SIZE_T cbCustomizationString) +{ + C_ASSERT( sizeof(SYMCRYPT_XXX_STATE) == sizeof(SYMCRYPT_SHAKEXXX_STATE) ); + + SYMCRYPT_SHAKEXXX_INIT( (SYMCRYPT_SHAKEXXX_STATE*)pState ); + + // Perform cSHAKE processing of input strings when any of the input strings is non-empty + if (cbFunctionNameString != 0 || cbCustomizationString != 0) + { + // cSHAKE and SHAKE have different paddings. pState->paddingValue + // is set to SYMCRYPT_SHAKE_PADDING_VALUE in the SHAKE initialization above. + // We update the padding value here because at least one of the input strings + // is non-empty and cSHAKE will not default to SHAKE. + pState->ks.paddingValue = SYMCRYPT_CSHAKE_PADDING_VALUE; + + SymCryptCShakeEncodeInputStrings(&pState->ks, + pbFunctionNameString, cbFunctionNameString, + pbCustomizationString, cbCustomizationString); + } + + SYMCRYPT_SET_MAGIC(pState); +} + +// +// SymCryptCShakeAppend +// +VOID +SYMCRYPT_CALL +SYMCRYPT_XxxAppend( + _Inout_ PSYMCRYPT_XXX_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + // Fixing of the padding value + // + // SymCryptKeccakAppend will reset the state, switch to absorb mode, + // and append data to the empty state if the state was in squeeze mode + // when Append is called. This behavior is equivalent to initializing + // cSHAKE with empty input strings, which makes cSHAKE a SHAKE instance. + // + // cSHAKE and SHAKE have different paddings, so we have to update the + // padding value in case it was cSHAKE padding before. + if (pState->ks.squeezeMode) + { + pState->ks.paddingValue = SYMCRYPT_SHAKE_PADDING_VALUE; + } + + SymCryptKeccakAppend(&pState->ks, pbData, cbData); +} + +// +// SymCryptCShakeExtract +// +VOID +SYMCRYPT_CALL +SYMCRYPT_XxxExtract( + _Inout_ PSYMCRYPT_XXX_STATE pState, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult, + BOOLEAN bWipe) +{ + SymCryptKeccakExtract(&pState->ks, pbResult, cbResult, bWipe); + + if (bWipe) + { + // If the state was wiped, set the state as if cSHAKE was initialized + // with empty strings, which is equivalent to empty SHAKE state. + // We have no way to store the Function Name string and Customization + // string information to go back to the initial cSHAKE state. + pState->ks.paddingValue = SYMCRYPT_SHAKE_PADDING_VALUE; + } +} + +// +// SymCryptCShakeResult +// +VOID +SYMCRYPT_CALL +SYMCRYPT_XxxResult( + _Inout_ PSYMCRYPT_XXX_STATE pState, + _Out_writes_( SYMCRYPT_CSHAKEXXX_RESULT_SIZE ) PBYTE pbResult) +{ + SymCryptKeccakExtract(&pState->ks, pbResult, SYMCRYPT_CSHAKEXXX_RESULT_SIZE, TRUE); + + // Revert to cSHAKE initialized with empty strings state, i.e., empty SHAKE state + pState->ks.paddingValue = SYMCRYPT_SHAKE_PADDING_VALUE; +} + +// +// SymCryptCShakeStateCopy +// +VOID +SYMCRYPT_CALL +SYMCRYPT_XxxStateCopy(_In_ const SYMCRYPT_XXX_STATE* pSrc, _Out_ SYMCRYPT_XXX_STATE* pDst) +{ + SYMCRYPT_CHECK_MAGIC(pSrc); + *pDst = *pSrc; + SYMCRYPT_SET_MAGIC(pDst); +} diff --git a/libs/symcrypt/lib/desx.c b/libs/symcrypt/lib/desx.c new file mode 100644 index 00000000000..9bfbb0d23f6 --- /dev/null +++ b/libs/symcrypt/lib/desx.c @@ -0,0 +1,131 @@ +// +// DesX.c DESX implementation +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + + +#include "precomp.h" + + +const SYMCRYPT_BLOCKCIPHER SymCryptDesxBlockCipher_default = { + SymCryptDesxExpandKey, // PSYMCRYPT_BLOCKCIPHER_EXPAND_KEY expandKeyFunc; + SymCryptDesxEncrypt, // PSYMCRYPT_BLOCKCIPHER_CRYPT encryptFunc; + SymCryptDesxDecrypt, // PSYMCRYPT_BLOCKCIPHER_CRYPT decryptFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_CRYPT_ECB ecbEncryptFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_CRYPT_ECB ecbDecryptFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_CRYPT_MODE cbcEncryptFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_CRYPT_MODE cbcDecryptFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_MAC_MODE cbcMacFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_CRYPT_MODE ctrMsbFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_AEADPART_MODE gcmEncryptPartFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_AEADPART_MODE gcmDecryptPartFunc; + 8, // SIZE_T blockSize; + sizeof( SYMCRYPT_DESX_EXPANDED_KEY ), // SIZE_T expandedKeySize; // = sizeof( SYMCRYPT_XXX_EXPANDED_KEY ) +}; + +const PCSYMCRYPT_BLOCKCIPHER SymCryptDesxBlockCipher = &SymCryptDesxBlockCipher_default; + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDesxExpandKey( _Out_ PSYMCRYPT_DESX_EXPANDED_KEY pExpandedKey, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ) +{ + if( cbKey != 24 ) + { + return SYMCRYPT_WRONG_KEY_SIZE; + } + + SymCryptDesExpandKey( &pExpandedKey->desKey, pbKey, 8 ); + memcpy( pExpandedKey->inputWhitening, pbKey+8, 8 ); + memcpy( pExpandedKey->outputWhitening, pbKey+16, 8 ); + + return SYMCRYPT_NO_ERROR; +} + +VOID +SYMCRYPT_CALL +SymCryptDesxEncrypt( + _In_ PCSYMCRYPT_DESX_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_DESX_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_DESX_BLOCK_SIZE ) PBYTE pbDst ) +{ + SYMCRYPT_ALIGN BYTE buf[8]; + + // + // We buffer the result locally to obey the read once/write once rule. + // + SymCryptXorBytes( pbSrc, pExpandedKey->inputWhitening, buf, 8 ); + SymCryptDesEncrypt( &pExpandedKey->desKey, buf, buf ); + SymCryptXorBytes( buf, pExpandedKey->outputWhitening, pbDst, 8 ); + + SymCryptWipeKnownSize( buf, sizeof( buf ) ); +} + +VOID +SYMCRYPT_CALL +SymCryptDesxDecrypt( + _In_ PCSYMCRYPT_DESX_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_DESX_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_DESX_BLOCK_SIZE ) PBYTE pbDst ) +{ + SYMCRYPT_ALIGN BYTE buf[8]; + + // + // We buffer the result locally to obey the read once/write once rule. + // + SymCryptXorBytes( pbSrc, pExpandedKey->outputWhitening, buf, 8 ); + SymCryptDesDecrypt( &pExpandedKey->desKey, buf, buf ); + SymCryptXorBytes( buf, pExpandedKey->inputWhitening, pbDst, 8 ); + + SymCryptWipeKnownSize( buf, sizeof( buf ) ); +} + + +static const BYTE desxKnownKey[24] = { + 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, + 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff, + 0x01, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18 +}; + +static const BYTE desxKnownPlaintext[] = { + 0xd9, 0xb6, 0xa1, 0x4e, 0xe6, 0x71, 0x4e, 0x17 +}; + +static const BYTE desxKnownCiphertext[] = { + 0x66, 0x77, 0x1f, 0x2a, 0x0c, 0x05, 0x01, 0xca +}; + + +VOID +SYMCRYPT_CALL +SymCryptDesxSelftest(void) +{ + SYMCRYPT_DESX_EXPANDED_KEY key; + BYTE buf[SYMCRYPT_DESX_BLOCK_SIZE]; + + if( SymCryptDesxExpandKey( &key, desxKnownKey, sizeof( desxKnownKey )) != SYMCRYPT_NO_ERROR ) + { + SymCryptFatal( 'desx' ); + } + + SymCryptDesxEncrypt( &key, desxKnownPlaintext, buf ); + + SymCryptInjectError( buf, SYMCRYPT_DESX_BLOCK_SIZE ); + + if( memcmp( buf, desxKnownCiphertext, SYMCRYPT_DESX_BLOCK_SIZE ) != 0 ) + { + SymCryptFatal( 'desy' ); + } + + SymCryptDesxDecrypt( &key, desxKnownCiphertext, buf ); + + SymCryptInjectError( buf, SYMCRYPT_DESX_BLOCK_SIZE ); + + if( memcmp( buf, desxKnownPlaintext, SYMCRYPT_DESX_BLOCK_SIZE ) != 0 ) + { + SymCryptFatal( 'desz' ); + } + +} diff --git a/libs/symcrypt/lib/dh.c b/libs/symcrypt/lib/dh.c new file mode 100644 index 00000000000..9f2a3796aa7 --- /dev/null +++ b/libs/symcrypt/lib/dh.c @@ -0,0 +1,141 @@ +// +// dh.c DH functions +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// + +#include "precomp.h" + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDhSecretAgreement( + _In_ PCSYMCRYPT_DLKEY pkPrivate, + _In_ PCSYMCRYPT_DLKEY pkPublic, + SYMCRYPT_NUMBER_FORMAT format, + UINT32 flags, + _Out_writes_( cbAgreedSecret ) PBYTE pbAgreedSecret, + SIZE_T cbAgreedSecret ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PBYTE pbScratch = NULL; + SIZE_T cbScratch = 0; + PBYTE pbScratchInternal = NULL; + SIZE_T cbScratchInternal = 0; + + PCSYMCRYPT_DLGROUP pDlgroup = NULL; + + PSYMCRYPT_MODELEMENT peRes = NULL; + UINT32 cbModelement = 0; + + UINT32 nBitsOfExp = 0; + + // Make sure that the keys may be used in DH + if ( ((pkPrivate->fAlgorithmInfo & SYMCRYPT_FLAG_DLKEY_DH) == 0) || + ((pkPublic->fAlgorithmInfo & SYMCRYPT_FLAG_DLKEY_DH) == 0) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Make sure we only specify the correct flags and that + // there is a private key + if ( (flags != 0) || (!pkPrivate->fHasPrivateKey) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Check that the group is the same for both keys + if ( SymCryptDlgroupIsSame( pkPrivate->pDlgroup, pkPublic->pDlgroup ) ) + { + pDlgroup = pkPrivate->pDlgroup; + } + else + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Check the output buffer has the correct size + if (cbAgreedSecret != SymCryptDlkeySizeofPublicKey( pkPrivate )) + { + scError = SYMCRYPT_WRONG_BLOCK_SIZE; + goto cleanup; + } + + // Objects and scratch space size calculation + cbModelement = SymCryptSizeofModElementFromModulus( pDlgroup->pmP ); + cbScratch = cbModelement + + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_MODEXP( pDlgroup->nDigitsOfP ), + SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pDlgroup->nDigitsOfP )); + + // Scratch space allocation + pbScratch = SymCryptCallbackAlloc( cbScratch ); + if ( pbScratch == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + // Creating temporary + pbScratchInternal = pbScratch; + cbScratchInternal = cbScratch; + peRes = SymCryptModElementCreate( pbScratchInternal, cbModelement, pDlgroup->pmP ); + pbScratchInternal += cbModelement; + cbScratchInternal -= cbModelement; + + SYMCRYPT_ASSERT( peRes != NULL); + + // Fix the bits of the exponent (the private key might be either mod Q, mod 2^nBitsPriv, or mod P) + if (pkPrivate->fPrivateModQ) + { + nBitsOfExp = pkPrivate->nBitsPriv; + } + else + { + nBitsOfExp = pDlgroup->nBitsOfP; + } + + // Calculate the secret + SymCryptModExp( + pDlgroup->pmP, + pkPublic->pePublicKey, + pkPrivate->piPrivateKey, + nBitsOfExp, + 0, // SC safe + peRes, + pbScratchInternal, + cbScratchInternal ); + + // Check if the result is zero + if ( SymCryptModElementIsZero( pDlgroup->pmP, peRes ) ) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + // Output the result + scError = SymCryptModElementGetValue( + pDlgroup->pmP, + peRes, + pbAgreedSecret, + cbAgreedSecret, + format, + pbScratchInternal, + cbScratchInternal ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + +cleanup: + if ( pbScratch != NULL ) + { + SymCryptWipe( pbScratch, cbScratch ); + SymCryptCallbackFree( pbScratch ); + } + + return scError; +} diff --git a/libs/symcrypt/lib/dl_internal_groups.c b/libs/symcrypt/lib/dl_internal_groups.c new file mode 100644 index 00000000000..3476407c5ca --- /dev/null +++ b/libs/symcrypt/lib/dl_internal_groups.c @@ -0,0 +1,922 @@ +// +// dl_internal_groups.c Parameters for internally supported dl groups. +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// + +#include "precomp.h" + +// Do not delete the following preprocessor directive. +// It is used for folding the parameters. +#if 1 + +/*********************************** + * * + * IKE GROUPS (RFC 3526) * + * * + ***********************************/ + +static const BYTE rgbIke3526Modp2048[] = { + //P + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xC9, 0x0F, 0xDA, 0xA2, 0x21, 0x68, 0xC2, 0x34, + 0xC4, 0xC6, 0x62, 0x8B, 0x80, 0xDC, 0x1C, 0xD1, + 0x29, 0x02, 0x4E, 0x08, 0x8A, 0x67, 0xCC, 0x74, + 0x02, 0x0B, 0xBE, 0xA6, 0x3B, 0x13, 0x9B, 0x22, + 0x51, 0x4A, 0x08, 0x79, 0x8E, 0x34, 0x04, 0xDD, + 0xEF, 0x95, 0x19, 0xB3, 0xCD, 0x3A, 0x43, 0x1B, + 0x30, 0x2B, 0x0A, 0x6D, 0xF2, 0x5F, 0x14, 0x37, + 0x4F, 0xE1, 0x35, 0x6D, 0x6D, 0x51, 0xC2, 0x45, + 0xE4, 0x85, 0xB5, 0x76, 0x62, 0x5E, 0x7E, 0xC6, + 0xF4, 0x4C, 0x42, 0xE9, 0xA6, 0x37, 0xED, 0x6B, + 0x0B, 0xFF, 0x5C, 0xB6, 0xF4, 0x06, 0xB7, 0xED, + 0xEE, 0x38, 0x6B, 0xFB, 0x5A, 0x89, 0x9F, 0xA5, + 0xAE, 0x9F, 0x24, 0x11, 0x7C, 0x4B, 0x1F, 0xE6, + 0x49, 0x28, 0x66, 0x51, 0xEC, 0xE4, 0x5B, 0x3D, + 0xC2, 0x00, 0x7C, 0xB8, 0xA1, 0x63, 0xBF, 0x05, + 0x98, 0xDA, 0x48, 0x36, 0x1C, 0x55, 0xD3, 0x9A, + 0x69, 0x16, 0x3F, 0xA8, 0xFD, 0x24, 0xCF, 0x5F, + 0x83, 0x65, 0x5D, 0x23, 0xDC, 0xA3, 0xAD, 0x96, + 0x1C, 0x62, 0xF3, 0x56, 0x20, 0x85, 0x52, 0xBB, + 0x9E, 0xD5, 0x29, 0x07, 0x70, 0x96, 0x96, 0x6D, + 0x67, 0x0C, 0x35, 0x4E, 0x4A, 0xBC, 0x98, 0x04, + 0xF1, 0x74, 0x6C, 0x08, 0xCA, 0x18, 0x21, 0x7C, + 0x32, 0x90, 0x5E, 0x46, 0x2E, 0x36, 0xCE, 0x3B, + 0xE3, 0x9E, 0x77, 0x2C, 0x18, 0x0E, 0x86, 0x03, + 0x9B, 0x27, 0x83, 0xA2, 0xEC, 0x07, 0xA2, 0x8F, + 0xB5, 0xC5, 0x5D, 0xF0, 0x6F, 0x4C, 0x52, 0xC9, + 0xDE, 0x2B, 0xCB, 0xF6, 0x95, 0x58, 0x17, 0x18, + 0x39, 0x95, 0x49, 0x7C, 0xEA, 0x95, 0x6A, 0xE5, + 0x15, 0xD2, 0x26, 0x18, 0x98, 0xFA, 0x05, 0x10, + 0x15, 0x72, 0x8E, 0x5A, 0x8A, 0xAC, 0xAA, 0x68, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF +}; + +static const BYTE rgbIke3526Modp3072[] = { + //P + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xC9, 0x0F, 0xDA, 0xA2, 0x21, 0x68, 0xC2, 0x34, + 0xC4, 0xC6, 0x62, 0x8B, 0x80, 0xDC, 0x1C, 0xD1, + 0x29, 0x02, 0x4E, 0x08, 0x8A, 0x67, 0xCC, 0x74, + 0x02, 0x0B, 0xBE, 0xA6, 0x3B, 0x13, 0x9B, 0x22, + 0x51, 0x4A, 0x08, 0x79, 0x8E, 0x34, 0x04, 0xDD, + 0xEF, 0x95, 0x19, 0xB3, 0xCD, 0x3A, 0x43, 0x1B, + 0x30, 0x2B, 0x0A, 0x6D, 0xF2, 0x5F, 0x14, 0x37, + 0x4F, 0xE1, 0x35, 0x6D, 0x6D, 0x51, 0xC2, 0x45, + 0xE4, 0x85, 0xB5, 0x76, 0x62, 0x5E, 0x7E, 0xC6, + 0xF4, 0x4C, 0x42, 0xE9, 0xA6, 0x37, 0xED, 0x6B, + 0x0B, 0xFF, 0x5C, 0xB6, 0xF4, 0x06, 0xB7, 0xED, + 0xEE, 0x38, 0x6B, 0xFB, 0x5A, 0x89, 0x9F, 0xA5, + 0xAE, 0x9F, 0x24, 0x11, 0x7C, 0x4B, 0x1F, 0xE6, + 0x49, 0x28, 0x66, 0x51, 0xEC, 0xE4, 0x5B, 0x3D, + 0xC2, 0x00, 0x7C, 0xB8, 0xA1, 0x63, 0xBF, 0x05, + 0x98, 0xDA, 0x48, 0x36, 0x1C, 0x55, 0xD3, 0x9A, + 0x69, 0x16, 0x3F, 0xA8, 0xFD, 0x24, 0xCF, 0x5F, + 0x83, 0x65, 0x5D, 0x23, 0xDC, 0xA3, 0xAD, 0x96, + 0x1C, 0x62, 0xF3, 0x56, 0x20, 0x85, 0x52, 0xBB, + 0x9E, 0xD5, 0x29, 0x07, 0x70, 0x96, 0x96, 0x6D, + 0x67, 0x0C, 0x35, 0x4E, 0x4A, 0xBC, 0x98, 0x04, + 0xF1, 0x74, 0x6C, 0x08, 0xCA, 0x18, 0x21, 0x7C, + 0x32, 0x90, 0x5E, 0x46, 0x2E, 0x36, 0xCE, 0x3B, + 0xE3, 0x9E, 0x77, 0x2C, 0x18, 0x0E, 0x86, 0x03, + 0x9B, 0x27, 0x83, 0xA2, 0xEC, 0x07, 0xA2, 0x8F, + 0xB5, 0xC5, 0x5D, 0xF0, 0x6F, 0x4C, 0x52, 0xC9, + 0xDE, 0x2B, 0xCB, 0xF6, 0x95, 0x58, 0x17, 0x18, + 0x39, 0x95, 0x49, 0x7C, 0xEA, 0x95, 0x6A, 0xE5, + 0x15, 0xD2, 0x26, 0x18, 0x98, 0xFA, 0x05, 0x10, + 0x15, 0x72, 0x8E, 0x5A, 0x8A, 0xAA, 0xC4, 0x2D, + 0xAD, 0x33, 0x17, 0x0D, 0x04, 0x50, 0x7A, 0x33, + 0xA8, 0x55, 0x21, 0xAB, 0xDF, 0x1C, 0xBA, 0x64, + 0xEC, 0xFB, 0x85, 0x04, 0x58, 0xDB, 0xEF, 0x0A, + 0x8A, 0xEA, 0x71, 0x57, 0x5D, 0x06, 0x0C, 0x7D, + 0xB3, 0x97, 0x0F, 0x85, 0xA6, 0xE1, 0xE4, 0xC7, + 0xAB, 0xF5, 0xAE, 0x8C, 0xDB, 0x09, 0x33, 0xD7, + 0x1E, 0x8C, 0x94, 0xE0, 0x4A, 0x25, 0x61, 0x9D, + 0xCE, 0xE3, 0xD2, 0x26, 0x1A, 0xD2, 0xEE, 0x6B, + 0xF1, 0x2F, 0xFA, 0x06, 0xD9, 0x8A, 0x08, 0x64, + 0xD8, 0x76, 0x02, 0x73, 0x3E, 0xC8, 0x6A, 0x64, + 0x52, 0x1F, 0x2B, 0x18, 0x17, 0x7B, 0x20, 0x0C, + 0xBB, 0xE1, 0x17, 0x57, 0x7A, 0x61, 0x5D, 0x6C, + 0x77, 0x09, 0x88, 0xC0, 0xBA, 0xD9, 0x46, 0xE2, + 0x08, 0xE2, 0x4F, 0xA0, 0x74, 0xE5, 0xAB, 0x31, + 0x43, 0xDB, 0x5B, 0xFC, 0xE0, 0xFD, 0x10, 0x8E, + 0x4B, 0x82, 0xD1, 0x20, 0xA9, 0x3A, 0xD2, 0xCA, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF +}; + +static const BYTE rgbIke3526Modp4096[] = { + //P + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xC9, 0x0F, 0xDA, 0xA2, 0x21, 0x68, 0xC2, 0x34, + 0xC4, 0xC6, 0x62, 0x8B, 0x80, 0xDC, 0x1C, 0xD1, + 0x29, 0x02, 0x4E, 0x08, 0x8A, 0x67, 0xCC, 0x74, + 0x02, 0x0B, 0xBE, 0xA6, 0x3B, 0x13, 0x9B, 0x22, + 0x51, 0x4A, 0x08, 0x79, 0x8E, 0x34, 0x04, 0xDD, + 0xEF, 0x95, 0x19, 0xB3, 0xCD, 0x3A, 0x43, 0x1B, + 0x30, 0x2B, 0x0A, 0x6D, 0xF2, 0x5F, 0x14, 0x37, + 0x4F, 0xE1, 0x35, 0x6D, 0x6D, 0x51, 0xC2, 0x45, + 0xE4, 0x85, 0xB5, 0x76, 0x62, 0x5E, 0x7E, 0xC6, + 0xF4, 0x4C, 0x42, 0xE9, 0xA6, 0x37, 0xED, 0x6B, + 0x0B, 0xFF, 0x5C, 0xB6, 0xF4, 0x06, 0xB7, 0xED, + 0xEE, 0x38, 0x6B, 0xFB, 0x5A, 0x89, 0x9F, 0xA5, + 0xAE, 0x9F, 0x24, 0x11, 0x7C, 0x4B, 0x1F, 0xE6, + 0x49, 0x28, 0x66, 0x51, 0xEC, 0xE4, 0x5B, 0x3D, + 0xC2, 0x00, 0x7C, 0xB8, 0xA1, 0x63, 0xBF, 0x05, + 0x98, 0xDA, 0x48, 0x36, 0x1C, 0x55, 0xD3, 0x9A, + 0x69, 0x16, 0x3F, 0xA8, 0xFD, 0x24, 0xCF, 0x5F, + 0x83, 0x65, 0x5D, 0x23, 0xDC, 0xA3, 0xAD, 0x96, + 0x1C, 0x62, 0xF3, 0x56, 0x20, 0x85, 0x52, 0xBB, + 0x9E, 0xD5, 0x29, 0x07, 0x70, 0x96, 0x96, 0x6D, + 0x67, 0x0C, 0x35, 0x4E, 0x4A, 0xBC, 0x98, 0x04, + 0xF1, 0x74, 0x6C, 0x08, 0xCA, 0x18, 0x21, 0x7C, + 0x32, 0x90, 0x5E, 0x46, 0x2E, 0x36, 0xCE, 0x3B, + 0xE3, 0x9E, 0x77, 0x2C, 0x18, 0x0E, 0x86, 0x03, + 0x9B, 0x27, 0x83, 0xA2, 0xEC, 0x07, 0xA2, 0x8F, + 0xB5, 0xC5, 0x5D, 0xF0, 0x6F, 0x4C, 0x52, 0xC9, + 0xDE, 0x2B, 0xCB, 0xF6, 0x95, 0x58, 0x17, 0x18, + 0x39, 0x95, 0x49, 0x7C, 0xEA, 0x95, 0x6A, 0xE5, + 0x15, 0xD2, 0x26, 0x18, 0x98, 0xFA, 0x05, 0x10, + 0x15, 0x72, 0x8E, 0x5A, 0x8A, 0xAA, 0xC4, 0x2D, + 0xAD, 0x33, 0x17, 0x0D, 0x04, 0x50, 0x7A, 0x33, + 0xA8, 0x55, 0x21, 0xAB, 0xDF, 0x1C, 0xBA, 0x64, + 0xEC, 0xFB, 0x85, 0x04, 0x58, 0xDB, 0xEF, 0x0A, + 0x8A, 0xEA, 0x71, 0x57, 0x5D, 0x06, 0x0C, 0x7D, + 0xB3, 0x97, 0x0F, 0x85, 0xA6, 0xE1, 0xE4, 0xC7, + 0xAB, 0xF5, 0xAE, 0x8C, 0xDB, 0x09, 0x33, 0xD7, + 0x1E, 0x8C, 0x94, 0xE0, 0x4A, 0x25, 0x61, 0x9D, + 0xCE, 0xE3, 0xD2, 0x26, 0x1A, 0xD2, 0xEE, 0x6B, + 0xF1, 0x2F, 0xFA, 0x06, 0xD9, 0x8A, 0x08, 0x64, + 0xD8, 0x76, 0x02, 0x73, 0x3E, 0xC8, 0x6A, 0x64, + 0x52, 0x1F, 0x2B, 0x18, 0x17, 0x7B, 0x20, 0x0C, + 0xBB, 0xE1, 0x17, 0x57, 0x7A, 0x61, 0x5D, 0x6C, + 0x77, 0x09, 0x88, 0xC0, 0xBA, 0xD9, 0x46, 0xE2, + 0x08, 0xE2, 0x4F, 0xA0, 0x74, 0xE5, 0xAB, 0x31, + 0x43, 0xDB, 0x5B, 0xFC, 0xE0, 0xFD, 0x10, 0x8E, + 0x4B, 0x82, 0xD1, 0x20, 0xA9, 0x21, 0x08, 0x01, + 0x1A, 0x72, 0x3C, 0x12, 0xA7, 0x87, 0xE6, 0xD7, + 0x88, 0x71, 0x9A, 0x10, 0xBD, 0xBA, 0x5B, 0x26, + 0x99, 0xC3, 0x27, 0x18, 0x6A, 0xF4, 0xE2, 0x3C, + 0x1A, 0x94, 0x68, 0x34, 0xB6, 0x15, 0x0B, 0xDA, + 0x25, 0x83, 0xE9, 0xCA, 0x2A, 0xD4, 0x4C, 0xE8, + 0xDB, 0xBB, 0xC2, 0xDB, 0x04, 0xDE, 0x8E, 0xF9, + 0x2E, 0x8E, 0xFC, 0x14, 0x1F, 0xBE, 0xCA, 0xA6, + 0x28, 0x7C, 0x59, 0x47, 0x4E, 0x6B, 0xC0, 0x5D, + 0x99, 0xB2, 0x96, 0x4F, 0xA0, 0x90, 0xC3, 0xA2, + 0x23, 0x3B, 0xA1, 0x86, 0x51, 0x5B, 0xE7, 0xED, + 0x1F, 0x61, 0x29, 0x70, 0xCE, 0xE2, 0xD7, 0xAF, + 0xB8, 0x1B, 0xDD, 0x76, 0x21, 0x70, 0x48, 0x1C, + 0xD0, 0x06, 0x91, 0x27, 0xD5, 0xB0, 0x5A, 0xA9, + 0x93, 0xB4, 0xEA, 0x98, 0x8D, 0x8F, 0xDD, 0xC1, + 0x86, 0xFF, 0xB7, 0xDC, 0x90, 0xA6, 0xC0, 0x8F, + 0x4D, 0xF4, 0x35, 0xC9, 0x34, 0x06, 0x31, 0x99, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF +}; + +static const BYTE rgbIke3526Modp6144[] = { + //P + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xC9, 0x0F, 0xDA, 0xA2, 0x21, 0x68, 0xC2, 0x34, + 0xC4, 0xC6, 0x62, 0x8B, 0x80, 0xDC, 0x1C, 0xD1, + 0x29, 0x02, 0x4E, 0x08, 0x8A, 0x67, 0xCC, 0x74, + 0x02, 0x0B, 0xBE, 0xA6, 0x3B, 0x13, 0x9B, 0x22, + 0x51, 0x4A, 0x08, 0x79, 0x8E, 0x34, 0x04, 0xDD, + 0xEF, 0x95, 0x19, 0xB3, 0xCD, 0x3A, 0x43, 0x1B, + 0x30, 0x2B, 0x0A, 0x6D, 0xF2, 0x5F, 0x14, 0x37, + 0x4F, 0xE1, 0x35, 0x6D, 0x6D, 0x51, 0xC2, 0x45, + 0xE4, 0x85, 0xB5, 0x76, 0x62, 0x5E, 0x7E, 0xC6, + 0xF4, 0x4C, 0x42, 0xE9, 0xA6, 0x37, 0xED, 0x6B, + 0x0B, 0xFF, 0x5C, 0xB6, 0xF4, 0x06, 0xB7, 0xED, + 0xEE, 0x38, 0x6B, 0xFB, 0x5A, 0x89, 0x9F, 0xA5, + 0xAE, 0x9F, 0x24, 0x11, 0x7C, 0x4B, 0x1F, 0xE6, + 0x49, 0x28, 0x66, 0x51, 0xEC, 0xE4, 0x5B, 0x3D, + 0xC2, 0x00, 0x7C, 0xB8, 0xA1, 0x63, 0xBF, 0x05, + 0x98, 0xDA, 0x48, 0x36, 0x1C, 0x55, 0xD3, 0x9A, + 0x69, 0x16, 0x3F, 0xA8, 0xFD, 0x24, 0xCF, 0x5F, + 0x83, 0x65, 0x5D, 0x23, 0xDC, 0xA3, 0xAD, 0x96, + 0x1C, 0x62, 0xF3, 0x56, 0x20, 0x85, 0x52, 0xBB, + 0x9E, 0xD5, 0x29, 0x07, 0x70, 0x96, 0x96, 0x6D, + 0x67, 0x0C, 0x35, 0x4E, 0x4A, 0xBC, 0x98, 0x04, + 0xF1, 0x74, 0x6C, 0x08, 0xCA, 0x18, 0x21, 0x7C, + 0x32, 0x90, 0x5E, 0x46, 0x2E, 0x36, 0xCE, 0x3B, + 0xE3, 0x9E, 0x77, 0x2C, 0x18, 0x0E, 0x86, 0x03, + 0x9B, 0x27, 0x83, 0xA2, 0xEC, 0x07, 0xA2, 0x8F, + 0xB5, 0xC5, 0x5D, 0xF0, 0x6F, 0x4C, 0x52, 0xC9, + 0xDE, 0x2B, 0xCB, 0xF6, 0x95, 0x58, 0x17, 0x18, + 0x39, 0x95, 0x49, 0x7C, 0xEA, 0x95, 0x6A, 0xE5, + 0x15, 0xD2, 0x26, 0x18, 0x98, 0xFA, 0x05, 0x10, + 0x15, 0x72, 0x8E, 0x5A, 0x8A, 0xAA, 0xC4, 0x2D, + 0xAD, 0x33, 0x17, 0x0D, 0x04, 0x50, 0x7A, 0x33, + 0xA8, 0x55, 0x21, 0xAB, 0xDF, 0x1C, 0xBA, 0x64, + 0xEC, 0xFB, 0x85, 0x04, 0x58, 0xDB, 0xEF, 0x0A, + 0x8A, 0xEA, 0x71, 0x57, 0x5D, 0x06, 0x0C, 0x7D, + 0xB3, 0x97, 0x0F, 0x85, 0xA6, 0xE1, 0xE4, 0xC7, + 0xAB, 0xF5, 0xAE, 0x8C, 0xDB, 0x09, 0x33, 0xD7, + 0x1E, 0x8C, 0x94, 0xE0, 0x4A, 0x25, 0x61, 0x9D, + 0xCE, 0xE3, 0xD2, 0x26, 0x1A, 0xD2, 0xEE, 0x6B, + 0xF1, 0x2F, 0xFA, 0x06, 0xD9, 0x8A, 0x08, 0x64, + 0xD8, 0x76, 0x02, 0x73, 0x3E, 0xC8, 0x6A, 0x64, + 0x52, 0x1F, 0x2B, 0x18, 0x17, 0x7B, 0x20, 0x0C, + 0xBB, 0xE1, 0x17, 0x57, 0x7A, 0x61, 0x5D, 0x6C, + 0x77, 0x09, 0x88, 0xC0, 0xBA, 0xD9, 0x46, 0xE2, + 0x08, 0xE2, 0x4F, 0xA0, 0x74, 0xE5, 0xAB, 0x31, + 0x43, 0xDB, 0x5B, 0xFC, 0xE0, 0xFD, 0x10, 0x8E, + 0x4B, 0x82, 0xD1, 0x20, 0xA9, 0x21, 0x08, 0x01, + 0x1A, 0x72, 0x3C, 0x12, 0xA7, 0x87, 0xE6, 0xD7, + 0x88, 0x71, 0x9A, 0x10, 0xBD, 0xBA, 0x5B, 0x26, + 0x99, 0xC3, 0x27, 0x18, 0x6A, 0xF4, 0xE2, 0x3C, + 0x1A, 0x94, 0x68, 0x34, 0xB6, 0x15, 0x0B, 0xDA, + 0x25, 0x83, 0xE9, 0xCA, 0x2A, 0xD4, 0x4C, 0xE8, + 0xDB, 0xBB, 0xC2, 0xDB, 0x04, 0xDE, 0x8E, 0xF9, + 0x2E, 0x8E, 0xFC, 0x14, 0x1F, 0xBE, 0xCA, 0xA6, + 0x28, 0x7C, 0x59, 0x47, 0x4E, 0x6B, 0xC0, 0x5D, + 0x99, 0xB2, 0x96, 0x4F, 0xA0, 0x90, 0xC3, 0xA2, + 0x23, 0x3B, 0xA1, 0x86, 0x51, 0x5B, 0xE7, 0xED, + 0x1F, 0x61, 0x29, 0x70, 0xCE, 0xE2, 0xD7, 0xAF, + 0xB8, 0x1B, 0xDD, 0x76, 0x21, 0x70, 0x48, 0x1C, + 0xD0, 0x06, 0x91, 0x27, 0xD5, 0xB0, 0x5A, 0xA9, + 0x93, 0xB4, 0xEA, 0x98, 0x8D, 0x8F, 0xDD, 0xC1, + 0x86, 0xFF, 0xB7, 0xDC, 0x90, 0xA6, 0xC0, 0x8F, + 0x4D, 0xF4, 0x35, 0xC9, 0x34, 0x02, 0x84, 0x92, + 0x36, 0xC3, 0xFA, 0xB4, 0xD2, 0x7C, 0x70, 0x26, + 0xC1, 0xD4, 0xDC, 0xB2, 0x60, 0x26, 0x46, 0xDE, + 0xC9, 0x75, 0x1E, 0x76, 0x3D, 0xBA, 0x37, 0xBD, + 0xF8, 0xFF, 0x94, 0x06, 0xAD, 0x9E, 0x53, 0x0E, + 0xE5, 0xDB, 0x38, 0x2F, 0x41, 0x30, 0x01, 0xAE, + 0xB0, 0x6A, 0x53, 0xED, 0x90, 0x27, 0xD8, 0x31, + 0x17, 0x97, 0x27, 0xB0, 0x86, 0x5A, 0x89, 0x18, + 0xDA, 0x3E, 0xDB, 0xEB, 0xCF, 0x9B, 0x14, 0xED, + 0x44, 0xCE, 0x6C, 0xBA, 0xCE, 0xD4, 0xBB, 0x1B, + 0xDB, 0x7F, 0x14, 0x47, 0xE6, 0xCC, 0x25, 0x4B, + 0x33, 0x20, 0x51, 0x51, 0x2B, 0xD7, 0xAF, 0x42, + 0x6F, 0xB8, 0xF4, 0x01, 0x37, 0x8C, 0xD2, 0xBF, + 0x59, 0x83, 0xCA, 0x01, 0xC6, 0x4B, 0x92, 0xEC, + 0xF0, 0x32, 0xEA, 0x15, 0xD1, 0x72, 0x1D, 0x03, + 0xF4, 0x82, 0xD7, 0xCE, 0x6E, 0x74, 0xFE, 0xF6, + 0xD5, 0x5E, 0x70, 0x2F, 0x46, 0x98, 0x0C, 0x82, + 0xB5, 0xA8, 0x40, 0x31, 0x90, 0x0B, 0x1C, 0x9E, + 0x59, 0xE7, 0xC9, 0x7F, 0xBE, 0xC7, 0xE8, 0xF3, + 0x23, 0xA9, 0x7A, 0x7E, 0x36, 0xCC, 0x88, 0xBE, + 0x0F, 0x1D, 0x45, 0xB7, 0xFF, 0x58, 0x5A, 0xC5, + 0x4B, 0xD4, 0x07, 0xB2, 0x2B, 0x41, 0x54, 0xAA, + 0xCC, 0x8F, 0x6D, 0x7E, 0xBF, 0x48, 0xE1, 0xD8, + 0x14, 0xCC, 0x5E, 0xD2, 0x0F, 0x80, 0x37, 0xE0, + 0xA7, 0x97, 0x15, 0xEE, 0xF2, 0x9B, 0xE3, 0x28, + 0x06, 0xA1, 0xD5, 0x8B, 0xB7, 0xC5, 0xDA, 0x76, + 0xF5, 0x50, 0xAA, 0x3D, 0x8A, 0x1F, 0xBF, 0xF0, + 0xEB, 0x19, 0xCC, 0xB1, 0xA3, 0x13, 0xD5, 0x5C, + 0xDA, 0x56, 0xC9, 0xEC, 0x2E, 0xF2, 0x96, 0x32, + 0x38, 0x7F, 0xE8, 0xD7, 0x6E, 0x3C, 0x04, 0x68, + 0x04, 0x3E, 0x8F, 0x66, 0x3F, 0x48, 0x60, 0xEE, + 0x12, 0xBF, 0x2D, 0x5B, 0x0B, 0x74, 0x74, 0xD6, + 0xE6, 0x94, 0xF9, 0x1E, 0x6D, 0xCC, 0x40, 0x24, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF +}; + +static const +BYTE rgbIke3526Modp8192[] = { + //P + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xC9, 0x0F, 0xDA, 0xA2, 0x21, 0x68, 0xC2, 0x34, + 0xC4, 0xC6, 0x62, 0x8B, 0x80, 0xDC, 0x1C, 0xD1, + 0x29, 0x02, 0x4E, 0x08, 0x8A, 0x67, 0xCC, 0x74, + 0x02, 0x0B, 0xBE, 0xA6, 0x3B, 0x13, 0x9B, 0x22, + 0x51, 0x4A, 0x08, 0x79, 0x8E, 0x34, 0x04, 0xDD, + 0xEF, 0x95, 0x19, 0xB3, 0xCD, 0x3A, 0x43, 0x1B, + 0x30, 0x2B, 0x0A, 0x6D, 0xF2, 0x5F, 0x14, 0x37, + 0x4F, 0xE1, 0x35, 0x6D, 0x6D, 0x51, 0xC2, 0x45, + 0xE4, 0x85, 0xB5, 0x76, 0x62, 0x5E, 0x7E, 0xC6, + 0xF4, 0x4C, 0x42, 0xE9, 0xA6, 0x37, 0xED, 0x6B, + 0x0B, 0xFF, 0x5C, 0xB6, 0xF4, 0x06, 0xB7, 0xED, + 0xEE, 0x38, 0x6B, 0xFB, 0x5A, 0x89, 0x9F, 0xA5, + 0xAE, 0x9F, 0x24, 0x11, 0x7C, 0x4B, 0x1F, 0xE6, + 0x49, 0x28, 0x66, 0x51, 0xEC, 0xE4, 0x5B, 0x3D, + 0xC2, 0x00, 0x7C, 0xB8, 0xA1, 0x63, 0xBF, 0x05, + 0x98, 0xDA, 0x48, 0x36, 0x1C, 0x55, 0xD3, 0x9A, + 0x69, 0x16, 0x3F, 0xA8, 0xFD, 0x24, 0xCF, 0x5F, + 0x83, 0x65, 0x5D, 0x23, 0xDC, 0xA3, 0xAD, 0x96, + 0x1C, 0x62, 0xF3, 0x56, 0x20, 0x85, 0x52, 0xBB, + 0x9E, 0xD5, 0x29, 0x07, 0x70, 0x96, 0x96, 0x6D, + 0x67, 0x0C, 0x35, 0x4E, 0x4A, 0xBC, 0x98, 0x04, + 0xF1, 0x74, 0x6C, 0x08, 0xCA, 0x18, 0x21, 0x7C, + 0x32, 0x90, 0x5E, 0x46, 0x2E, 0x36, 0xCE, 0x3B, + 0xE3, 0x9E, 0x77, 0x2C, 0x18, 0x0E, 0x86, 0x03, + 0x9B, 0x27, 0x83, 0xA2, 0xEC, 0x07, 0xA2, 0x8F, + 0xB5, 0xC5, 0x5D, 0xF0, 0x6F, 0x4C, 0x52, 0xC9, + 0xDE, 0x2B, 0xCB, 0xF6, 0x95, 0x58, 0x17, 0x18, + 0x39, 0x95, 0x49, 0x7C, 0xEA, 0x95, 0x6A, 0xE5, + 0x15, 0xD2, 0x26, 0x18, 0x98, 0xFA, 0x05, 0x10, + 0x15, 0x72, 0x8E, 0x5A, 0x8A, 0xAA, 0xC4, 0x2D, + 0xAD, 0x33, 0x17, 0x0D, 0x04, 0x50, 0x7A, 0x33, + 0xA8, 0x55, 0x21, 0xAB, 0xDF, 0x1C, 0xBA, 0x64, + 0xEC, 0xFB, 0x85, 0x04, 0x58, 0xDB, 0xEF, 0x0A, + 0x8A, 0xEA, 0x71, 0x57, 0x5D, 0x06, 0x0C, 0x7D, + 0xB3, 0x97, 0x0F, 0x85, 0xA6, 0xE1, 0xE4, 0xC7, + 0xAB, 0xF5, 0xAE, 0x8C, 0xDB, 0x09, 0x33, 0xD7, + 0x1E, 0x8C, 0x94, 0xE0, 0x4A, 0x25, 0x61, 0x9D, + 0xCE, 0xE3, 0xD2, 0x26, 0x1A, 0xD2, 0xEE, 0x6B, + 0xF1, 0x2F, 0xFA, 0x06, 0xD9, 0x8A, 0x08, 0x64, + 0xD8, 0x76, 0x02, 0x73, 0x3E, 0xC8, 0x6A, 0x64, + 0x52, 0x1F, 0x2B, 0x18, 0x17, 0x7B, 0x20, 0x0C, + 0xBB, 0xE1, 0x17, 0x57, 0x7A, 0x61, 0x5D, 0x6C, + 0x77, 0x09, 0x88, 0xC0, 0xBA, 0xD9, 0x46, 0xE2, + 0x08, 0xE2, 0x4F, 0xA0, 0x74, 0xE5, 0xAB, 0x31, + 0x43, 0xDB, 0x5B, 0xFC, 0xE0, 0xFD, 0x10, 0x8E, + 0x4B, 0x82, 0xD1, 0x20, 0xA9, 0x21, 0x08, 0x01, + 0x1A, 0x72, 0x3C, 0x12, 0xA7, 0x87, 0xE6, 0xD7, + 0x88, 0x71, 0x9A, 0x10, 0xBD, 0xBA, 0x5B, 0x26, + 0x99, 0xC3, 0x27, 0x18, 0x6A, 0xF4, 0xE2, 0x3C, + 0x1A, 0x94, 0x68, 0x34, 0xB6, 0x15, 0x0B, 0xDA, + 0x25, 0x83, 0xE9, 0xCA, 0x2A, 0xD4, 0x4C, 0xE8, + 0xDB, 0xBB, 0xC2, 0xDB, 0x04, 0xDE, 0x8E, 0xF9, + 0x2E, 0x8E, 0xFC, 0x14, 0x1F, 0xBE, 0xCA, 0xA6, + 0x28, 0x7C, 0x59, 0x47, 0x4E, 0x6B, 0xC0, 0x5D, + 0x99, 0xB2, 0x96, 0x4F, 0xA0, 0x90, 0xC3, 0xA2, + 0x23, 0x3B, 0xA1, 0x86, 0x51, 0x5B, 0xE7, 0xED, + 0x1F, 0x61, 0x29, 0x70, 0xCE, 0xE2, 0xD7, 0xAF, + 0xB8, 0x1B, 0xDD, 0x76, 0x21, 0x70, 0x48, 0x1C, + 0xD0, 0x06, 0x91, 0x27, 0xD5, 0xB0, 0x5A, 0xA9, + 0x93, 0xB4, 0xEA, 0x98, 0x8D, 0x8F, 0xDD, 0xC1, + 0x86, 0xFF, 0xB7, 0xDC, 0x90, 0xA6, 0xC0, 0x8F, + 0x4D, 0xF4, 0x35, 0xC9, 0x34, 0x02, 0x84, 0x92, + 0x36, 0xC3, 0xFA, 0xB4, 0xD2, 0x7C, 0x70, 0x26, + 0xC1, 0xD4, 0xDC, 0xB2, 0x60, 0x26, 0x46, 0xDE, + 0xC9, 0x75, 0x1E, 0x76, 0x3D, 0xBA, 0x37, 0xBD, + 0xF8, 0xFF, 0x94, 0x06, 0xAD, 0x9E, 0x53, 0x0E, + 0xE5, 0xDB, 0x38, 0x2F, 0x41, 0x30, 0x01, 0xAE, + 0xB0, 0x6A, 0x53, 0xED, 0x90, 0x27, 0xD8, 0x31, + 0x17, 0x97, 0x27, 0xB0, 0x86, 0x5A, 0x89, 0x18, + 0xDA, 0x3E, 0xDB, 0xEB, 0xCF, 0x9B, 0x14, 0xED, + 0x44, 0xCE, 0x6C, 0xBA, 0xCE, 0xD4, 0xBB, 0x1B, + 0xDB, 0x7F, 0x14, 0x47, 0xE6, 0xCC, 0x25, 0x4B, + 0x33, 0x20, 0x51, 0x51, 0x2B, 0xD7, 0xAF, 0x42, + 0x6F, 0xB8, 0xF4, 0x01, 0x37, 0x8C, 0xD2, 0xBF, + 0x59, 0x83, 0xCA, 0x01, 0xC6, 0x4B, 0x92, 0xEC, + 0xF0, 0x32, 0xEA, 0x15, 0xD1, 0x72, 0x1D, 0x03, + 0xF4, 0x82, 0xD7, 0xCE, 0x6E, 0x74, 0xFE, 0xF6, + 0xD5, 0x5E, 0x70, 0x2F, 0x46, 0x98, 0x0C, 0x82, + 0xB5, 0xA8, 0x40, 0x31, 0x90, 0x0B, 0x1C, 0x9E, + 0x59, 0xE7, 0xC9, 0x7F, 0xBE, 0xC7, 0xE8, 0xF3, + 0x23, 0xA9, 0x7A, 0x7E, 0x36, 0xCC, 0x88, 0xBE, + 0x0F, 0x1D, 0x45, 0xB7, 0xFF, 0x58, 0x5A, 0xC5, + 0x4B, 0xD4, 0x07, 0xB2, 0x2B, 0x41, 0x54, 0xAA, + 0xCC, 0x8F, 0x6D, 0x7E, 0xBF, 0x48, 0xE1, 0xD8, + 0x14, 0xCC, 0x5E, 0xD2, 0x0F, 0x80, 0x37, 0xE0, + 0xA7, 0x97, 0x15, 0xEE, 0xF2, 0x9B, 0xE3, 0x28, + 0x06, 0xA1, 0xD5, 0x8B, 0xB7, 0xC5, 0xDA, 0x76, + 0xF5, 0x50, 0xAA, 0x3D, 0x8A, 0x1F, 0xBF, 0xF0, + 0xEB, 0x19, 0xCC, 0xB1, 0xA3, 0x13, 0xD5, 0x5C, + 0xDA, 0x56, 0xC9, 0xEC, 0x2E, 0xF2, 0x96, 0x32, + 0x38, 0x7F, 0xE8, 0xD7, 0x6E, 0x3C, 0x04, 0x68, + 0x04, 0x3E, 0x8F, 0x66, 0x3F, 0x48, 0x60, 0xEE, + 0x12, 0xBF, 0x2D, 0x5B, 0x0B, 0x74, 0x74, 0xD6, + 0xE6, 0x94, 0xF9, 0x1E, 0x6D, 0xBE, 0x11, 0x59, + 0x74, 0xA3, 0x92, 0x6F, 0x12, 0xFE, 0xE5, 0xE4, + 0x38, 0x77, 0x7C, 0xB6, 0xA9, 0x32, 0xDF, 0x8C, + 0xD8, 0xBE, 0xC4, 0xD0, 0x73, 0xB9, 0x31, 0xBA, + 0x3B, 0xC8, 0x32, 0xB6, 0x8D, 0x9D, 0xD3, 0x00, + 0x74, 0x1F, 0xA7, 0xBF, 0x8A, 0xFC, 0x47, 0xED, + 0x25, 0x76, 0xF6, 0x93, 0x6B, 0xA4, 0x24, 0x66, + 0x3A, 0xAB, 0x63, 0x9C, 0x5A, 0xE4, 0xF5, 0x68, + 0x34, 0x23, 0xB4, 0x74, 0x2B, 0xF1, 0xC9, 0x78, + 0x23, 0x8F, 0x16, 0xCB, 0xE3, 0x9D, 0x65, 0x2D, + 0xE3, 0xFD, 0xB8, 0xBE, 0xFC, 0x84, 0x8A, 0xD9, + 0x22, 0x22, 0x2E, 0x04, 0xA4, 0x03, 0x7C, 0x07, + 0x13, 0xEB, 0x57, 0xA8, 0x1A, 0x23, 0xF0, 0xC7, + 0x34, 0x73, 0xFC, 0x64, 0x6C, 0xEA, 0x30, 0x6B, + 0x4B, 0xCB, 0xC8, 0x86, 0x2F, 0x83, 0x85, 0xDD, + 0xFA, 0x9D, 0x4B, 0x7F, 0xA2, 0xC0, 0x87, 0xE8, + 0x79, 0x68, 0x33, 0x03, 0xED, 0x5B, 0xDD, 0x3A, + 0x06, 0x2B, 0x3C, 0xF5, 0xB3, 0xA2, 0x78, 0xA6, + 0x6D, 0x2A, 0x13, 0xF8, 0x3F, 0x44, 0xF8, 0x2D, + 0xDF, 0x31, 0x0E, 0xE0, 0x74, 0xAB, 0x6A, 0x36, + 0x45, 0x97, 0xE8, 0x99, 0xA0, 0x25, 0x5D, 0xC1, + 0x64, 0xF3, 0x1C, 0xC5, 0x08, 0x46, 0x85, 0x1D, + 0xF9, 0xAB, 0x48, 0x19, 0x5D, 0xED, 0x7E, 0xA1, + 0xB1, 0xD5, 0x10, 0xBD, 0x7E, 0xE7, 0x4D, 0x73, + 0xFA, 0xF3, 0x6B, 0xC3, 0x1E, 0xCF, 0xA2, 0x68, + 0x35, 0x90, 0x46, 0xF4, 0xEB, 0x87, 0x9F, 0x92, + 0x40, 0x09, 0x43, 0x8B, 0x48, 0x1C, 0x6C, 0xD7, + 0x88, 0x9A, 0x00, 0x2E, 0xD5, 0xEE, 0x38, 0x2B, + 0xC9, 0x19, 0x0D, 0xA6, 0xFC, 0x02, 0x6E, 0x47, + 0x95, 0x58, 0xE4, 0x47, 0x56, 0x77, 0xE9, 0xAA, + 0x9E, 0x30, 0x50, 0xE2, 0x76, 0x56, 0x94, 0xDF, + 0xC8, 0x1F, 0x56, 0xE8, 0x80, 0xB9, 0x6E, 0x71, + 0x60, 0xC9, 0x80, 0xDD, 0x98, 0xED, 0xD3, 0xDF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF +}; + +/******************************* +* * +* TLS GROUPS (RFC 7919) * +* * +********************************/ + +static const BYTE rgbTls7919ffdhe2048[] = { + //P + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xAD, 0xF8, 0x54, 0x58, 0xA2, 0xBB, 0x4A, 0x9A, + 0xAF, 0xDC, 0x56, 0x20, 0x27, 0x3D, 0x3C, 0xF1, + 0xD8, 0xB9, 0xC5, 0x83, 0xCE, 0x2D, 0x36, 0x95, + 0xA9, 0xE1, 0x36, 0x41, 0x14, 0x64, 0x33, 0xFB, + 0xCC, 0x93, 0x9D, 0xCE, 0x24, 0x9B, 0x3E, 0xF9, + 0x7D, 0x2F, 0xE3, 0x63, 0x63, 0x0C, 0x75, 0xD8, + 0xF6, 0x81, 0xB2, 0x02, 0xAE, 0xC4, 0x61, 0x7A, + 0xD3, 0xDF, 0x1E, 0xD5, 0xD5, 0xFD, 0x65, 0x61, + 0x24, 0x33, 0xF5, 0x1F, 0x5F, 0x06, 0x6E, 0xD0, + 0x85, 0x63, 0x65, 0x55, 0x3D, 0xED, 0x1A, 0xF3, + 0xB5, 0x57, 0x13, 0x5E, 0x7F, 0x57, 0xC9, 0x35, + 0x98, 0x4F, 0x0C, 0x70, 0xE0, 0xE6, 0x8B, 0x77, + 0xE2, 0xA6, 0x89, 0xDA, 0xF3, 0xEF, 0xE8, 0x72, + 0x1D, 0xF1, 0x58, 0xA1, 0x36, 0xAD, 0xE7, 0x35, + 0x30, 0xAC, 0xCA, 0x4F, 0x48, 0x3A, 0x79, 0x7A, + 0xBC, 0x0A, 0xB1, 0x82, 0xB3, 0x24, 0xFB, 0x61, + 0xD1, 0x08, 0xA9, 0x4B, 0xB2, 0xC8, 0xE3, 0xFB, + 0xB9, 0x6A, 0xDA, 0xB7, 0x60, 0xD7, 0xF4, 0x68, + 0x1D, 0x4F, 0x42, 0xA3, 0xDE, 0x39, 0x4D, 0xF4, + 0xAE, 0x56, 0xED, 0xE7, 0x63, 0x72, 0xBB, 0x19, + 0x0B, 0x07, 0xA7, 0xC8, 0xEE, 0x0A, 0x6D, 0x70, + 0x9E, 0x02, 0xFC, 0xE1, 0xCD, 0xF7, 0xE2, 0xEC, + 0xC0, 0x34, 0x04, 0xCD, 0x28, 0x34, 0x2F, 0x61, + 0x91, 0x72, 0xFE, 0x9C, 0xE9, 0x85, 0x83, 0xFF, + 0x8E, 0x4F, 0x12, 0x32, 0xEE, 0xF2, 0x81, 0x83, + 0xC3, 0xFE, 0x3B, 0x1B, 0x4C, 0x6F, 0xAD, 0x73, + 0x3B, 0xB5, 0xFC, 0xBC, 0x2E, 0xC2, 0x20, 0x05, + 0xC5, 0x8E, 0xF1, 0x83, 0x7D, 0x16, 0x83, 0xB2, + 0xC6, 0xF3, 0x4A, 0x26, 0xC1, 0xB2, 0xEF, 0xFA, + 0x88, 0x6B, 0x42, 0x38, 0x61, 0x28, 0x5C, 0x97, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF +}; + +static const BYTE rgbTls7919ffdhe3072[] = { + //P + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xAD, 0xF8, 0x54, 0x58, 0xA2, 0xBB, 0x4A, 0x9A, + 0xAF, 0xDC, 0x56, 0x20, 0x27, 0x3D, 0x3C, 0xF1, + 0xD8, 0xB9, 0xC5, 0x83, 0xCE, 0x2D, 0x36, 0x95, + 0xA9, 0xE1, 0x36, 0x41, 0x14, 0x64, 0x33, 0xFB, + 0xCC, 0x93, 0x9D, 0xCE, 0x24, 0x9B, 0x3E, 0xF9, + 0x7D, 0x2F, 0xE3, 0x63, 0x63, 0x0C, 0x75, 0xD8, + 0xF6, 0x81, 0xB2, 0x02, 0xAE, 0xC4, 0x61, 0x7A, + 0xD3, 0xDF, 0x1E, 0xD5, 0xD5, 0xFD, 0x65, 0x61, + 0x24, 0x33, 0xF5, 0x1F, 0x5F, 0x06, 0x6E, 0xD0, + 0x85, 0x63, 0x65, 0x55, 0x3D, 0xED, 0x1A, 0xF3, + 0xB5, 0x57, 0x13, 0x5E, 0x7F, 0x57, 0xC9, 0x35, + 0x98, 0x4F, 0x0C, 0x70, 0xE0, 0xE6, 0x8B, 0x77, + 0xE2, 0xA6, 0x89, 0xDA, 0xF3, 0xEF, 0xE8, 0x72, + 0x1D, 0xF1, 0x58, 0xA1, 0x36, 0xAD, 0xE7, 0x35, + 0x30, 0xAC, 0xCA, 0x4F, 0x48, 0x3A, 0x79, 0x7A, + 0xBC, 0x0A, 0xB1, 0x82, 0xB3, 0x24, 0xFB, 0x61, + 0xD1, 0x08, 0xA9, 0x4B, 0xB2, 0xC8, 0xE3, 0xFB, + 0xB9, 0x6A, 0xDA, 0xB7, 0x60, 0xD7, 0xF4, 0x68, + 0x1D, 0x4F, 0x42, 0xA3, 0xDE, 0x39, 0x4D, 0xF4, + 0xAE, 0x56, 0xED, 0xE7, 0x63, 0x72, 0xBB, 0x19, + 0x0B, 0x07, 0xA7, 0xC8, 0xEE, 0x0A, 0x6D, 0x70, + 0x9E, 0x02, 0xFC, 0xE1, 0xCD, 0xF7, 0xE2, 0xEC, + 0xC0, 0x34, 0x04, 0xCD, 0x28, 0x34, 0x2F, 0x61, + 0x91, 0x72, 0xFE, 0x9C, 0xE9, 0x85, 0x83, 0xFF, + 0x8E, 0x4F, 0x12, 0x32, 0xEE, 0xF2, 0x81, 0x83, + 0xC3, 0xFE, 0x3B, 0x1B, 0x4C, 0x6F, 0xAD, 0x73, + 0x3B, 0xB5, 0xFC, 0xBC, 0x2E, 0xC2, 0x20, 0x05, + 0xC5, 0x8E, 0xF1, 0x83, 0x7D, 0x16, 0x83, 0xB2, + 0xC6, 0xF3, 0x4A, 0x26, 0xC1, 0xB2, 0xEF, 0xFA, + 0x88, 0x6B, 0x42, 0x38, 0x61, 0x1F, 0xCF, 0xDC, + 0xDE, 0x35, 0x5B, 0x3B, 0x65, 0x19, 0x03, 0x5B, + 0xBC, 0x34, 0xF4, 0xDE, 0xF9, 0x9C, 0x02, 0x38, + 0x61, 0xB4, 0x6F, 0xC9, 0xD6, 0xE6, 0xC9, 0x07, + 0x7A, 0xD9, 0x1D, 0x26, 0x91, 0xF7, 0xF7, 0xEE, + 0x59, 0x8C, 0xB0, 0xFA, 0xC1, 0x86, 0xD9, 0x1C, + 0xAE, 0xFE, 0x13, 0x09, 0x85, 0x13, 0x92, 0x70, + 0xB4, 0x13, 0x0C, 0x93, 0xBC, 0x43, 0x79, 0x44, + 0xF4, 0xFD, 0x44, 0x52, 0xE2, 0xD7, 0x4D, 0xD3, + 0x64, 0xF2, 0xE2, 0x1E, 0x71, 0xF5, 0x4B, 0xFF, + 0x5C, 0xAE, 0x82, 0xAB, 0x9C, 0x9D, 0xF6, 0x9E, + 0xE8, 0x6D, 0x2B, 0xC5, 0x22, 0x36, 0x3A, 0x0D, + 0xAB, 0xC5, 0x21, 0x97, 0x9B, 0x0D, 0xEA, 0xDA, + 0x1D, 0xBF, 0x9A, 0x42, 0xD5, 0xC4, 0x48, 0x4E, + 0x0A, 0xBC, 0xD0, 0x6B, 0xFA, 0x53, 0xDD, 0xEF, + 0x3C, 0x1B, 0x20, 0xEE, 0x3F, 0xD5, 0x9D, 0x7C, + 0x25, 0xE4, 0x1D, 0x2B, 0x66, 0xC6, 0x2E, 0x37, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF +}; + +static const BYTE rgbTls7919ffdhe4096[] = { + //P + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xAD, 0xF8, 0x54, 0x58, 0xA2, 0xBB, 0x4A, 0x9A, + 0xAF, 0xDC, 0x56, 0x20, 0x27, 0x3D, 0x3C, 0xF1, + 0xD8, 0xB9, 0xC5, 0x83, 0xCE, 0x2D, 0x36, 0x95, + 0xA9, 0xE1, 0x36, 0x41, 0x14, 0x64, 0x33, 0xFB, + 0xCC, 0x93, 0x9D, 0xCE, 0x24, 0x9B, 0x3E, 0xF9, + 0x7D, 0x2F, 0xE3, 0x63, 0x63, 0x0C, 0x75, 0xD8, + 0xF6, 0x81, 0xB2, 0x02, 0xAE, 0xC4, 0x61, 0x7A, + 0xD3, 0xDF, 0x1E, 0xD5, 0xD5, 0xFD, 0x65, 0x61, + 0x24, 0x33, 0xF5, 0x1F, 0x5F, 0x06, 0x6E, 0xD0, + 0x85, 0x63, 0x65, 0x55, 0x3D, 0xED, 0x1A, 0xF3, + 0xB5, 0x57, 0x13, 0x5E, 0x7F, 0x57, 0xC9, 0x35, + 0x98, 0x4F, 0x0C, 0x70, 0xE0, 0xE6, 0x8B, 0x77, + 0xE2, 0xA6, 0x89, 0xDA, 0xF3, 0xEF, 0xE8, 0x72, + 0x1D, 0xF1, 0x58, 0xA1, 0x36, 0xAD, 0xE7, 0x35, + 0x30, 0xAC, 0xCA, 0x4F, 0x48, 0x3A, 0x79, 0x7A, + 0xBC, 0x0A, 0xB1, 0x82, 0xB3, 0x24, 0xFB, 0x61, + 0xD1, 0x08, 0xA9, 0x4B, 0xB2, 0xC8, 0xE3, 0xFB, + 0xB9, 0x6A, 0xDA, 0xB7, 0x60, 0xD7, 0xF4, 0x68, + 0x1D, 0x4F, 0x42, 0xA3, 0xDE, 0x39, 0x4D, 0xF4, + 0xAE, 0x56, 0xED, 0xE7, 0x63, 0x72, 0xBB, 0x19, + 0x0B, 0x07, 0xA7, 0xC8, 0xEE, 0x0A, 0x6D, 0x70, + 0x9E, 0x02, 0xFC, 0xE1, 0xCD, 0xF7, 0xE2, 0xEC, + 0xC0, 0x34, 0x04, 0xCD, 0x28, 0x34, 0x2F, 0x61, + 0x91, 0x72, 0xFE, 0x9C, 0xE9, 0x85, 0x83, 0xFF, + 0x8E, 0x4F, 0x12, 0x32, 0xEE, 0xF2, 0x81, 0x83, + 0xC3, 0xFE, 0x3B, 0x1B, 0x4C, 0x6F, 0xAD, 0x73, + 0x3B, 0xB5, 0xFC, 0xBC, 0x2E, 0xC2, 0x20, 0x05, + 0xC5, 0x8E, 0xF1, 0x83, 0x7D, 0x16, 0x83, 0xB2, + 0xC6, 0xF3, 0x4A, 0x26, 0xC1, 0xB2, 0xEF, 0xFA, + 0x88, 0x6B, 0x42, 0x38, 0x61, 0x1F, 0xCF, 0xDC, + 0xDE, 0x35, 0x5B, 0x3B, 0x65, 0x19, 0x03, 0x5B, + 0xBC, 0x34, 0xF4, 0xDE, 0xF9, 0x9C, 0x02, 0x38, + 0x61, 0xB4, 0x6F, 0xC9, 0xD6, 0xE6, 0xC9, 0x07, + 0x7A, 0xD9, 0x1D, 0x26, 0x91, 0xF7, 0xF7, 0xEE, + 0x59, 0x8C, 0xB0, 0xFA, 0xC1, 0x86, 0xD9, 0x1C, + 0xAE, 0xFE, 0x13, 0x09, 0x85, 0x13, 0x92, 0x70, + 0xB4, 0x13, 0x0C, 0x93, 0xBC, 0x43, 0x79, 0x44, + 0xF4, 0xFD, 0x44, 0x52, 0xE2, 0xD7, 0x4D, 0xD3, + 0x64, 0xF2, 0xE2, 0x1E, 0x71, 0xF5, 0x4B, 0xFF, + 0x5C, 0xAE, 0x82, 0xAB, 0x9C, 0x9D, 0xF6, 0x9E, + 0xE8, 0x6D, 0x2B, 0xC5, 0x22, 0x36, 0x3A, 0x0D, + 0xAB, 0xC5, 0x21, 0x97, 0x9B, 0x0D, 0xEA, 0xDA, + 0x1D, 0xBF, 0x9A, 0x42, 0xD5, 0xC4, 0x48, 0x4E, + 0x0A, 0xBC, 0xD0, 0x6B, 0xFA, 0x53, 0xDD, 0xEF, + 0x3C, 0x1B, 0x20, 0xEE, 0x3F, 0xD5, 0x9D, 0x7C, + 0x25, 0xE4, 0x1D, 0x2B, 0x66, 0x9E, 0x1E, 0xF1, + 0x6E, 0x6F, 0x52, 0xC3, 0x16, 0x4D, 0xF4, 0xFB, + 0x79, 0x30, 0xE9, 0xE4, 0xE5, 0x88, 0x57, 0xB6, + 0xAC, 0x7D, 0x5F, 0x42, 0xD6, 0x9F, 0x6D, 0x18, + 0x77, 0x63, 0xCF, 0x1D, 0x55, 0x03, 0x40, 0x04, + 0x87, 0xF5, 0x5B, 0xA5, 0x7E, 0x31, 0xCC, 0x7A, + 0x71, 0x35, 0xC8, 0x86, 0xEF, 0xB4, 0x31, 0x8A, + 0xED, 0x6A, 0x1E, 0x01, 0x2D, 0x9E, 0x68, 0x32, + 0xA9, 0x07, 0x60, 0x0A, 0x91, 0x81, 0x30, 0xC4, + 0x6D, 0xC7, 0x78, 0xF9, 0x71, 0xAD, 0x00, 0x38, + 0x09, 0x29, 0x99, 0xA3, 0x33, 0xCB, 0x8B, 0x7A, + 0x1A, 0x1D, 0xB9, 0x3D, 0x71, 0x40, 0x00, 0x3C, + 0x2A, 0x4E, 0xCE, 0xA9, 0xF9, 0x8D, 0x0A, 0xCC, + 0x0A, 0x82, 0x91, 0xCD, 0xCE, 0xC9, 0x7D, 0xCF, + 0x8E, 0xC9, 0xB5, 0x5A, 0x7F, 0x88, 0xA4, 0x6B, + 0x4D, 0xB5, 0xA8, 0x51, 0xF4, 0x41, 0x82, 0xE1, + 0xC6, 0x8A, 0x00, 0x7E, 0x5E, 0x65, 0x5F, 0x6A, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF +}; + +static const BYTE rgbTls7919ffdhe6144[] = { + //P + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xAD, 0xF8, 0x54, 0x58, 0xA2, 0xBB, 0x4A, 0x9A, + 0xAF, 0xDC, 0x56, 0x20, 0x27, 0x3D, 0x3C, 0xF1, + 0xD8, 0xB9, 0xC5, 0x83, 0xCE, 0x2D, 0x36, 0x95, + 0xA9, 0xE1, 0x36, 0x41, 0x14, 0x64, 0x33, 0xFB, + 0xCC, 0x93, 0x9D, 0xCE, 0x24, 0x9B, 0x3E, 0xF9, + 0x7D, 0x2F, 0xE3, 0x63, 0x63, 0x0C, 0x75, 0xD8, + 0xF6, 0x81, 0xB2, 0x02, 0xAE, 0xC4, 0x61, 0x7A, + 0xD3, 0xDF, 0x1E, 0xD5, 0xD5, 0xFD, 0x65, 0x61, + 0x24, 0x33, 0xF5, 0x1F, 0x5F, 0x06, 0x6E, 0xD0, + 0x85, 0x63, 0x65, 0x55, 0x3D, 0xED, 0x1A, 0xF3, + 0xB5, 0x57, 0x13, 0x5E, 0x7F, 0x57, 0xC9, 0x35, + 0x98, 0x4F, 0x0C, 0x70, 0xE0, 0xE6, 0x8B, 0x77, + 0xE2, 0xA6, 0x89, 0xDA, 0xF3, 0xEF, 0xE8, 0x72, + 0x1D, 0xF1, 0x58, 0xA1, 0x36, 0xAD, 0xE7, 0x35, + 0x30, 0xAC, 0xCA, 0x4F, 0x48, 0x3A, 0x79, 0x7A, + 0xBC, 0x0A, 0xB1, 0x82, 0xB3, 0x24, 0xFB, 0x61, + 0xD1, 0x08, 0xA9, 0x4B, 0xB2, 0xC8, 0xE3, 0xFB, + 0xB9, 0x6A, 0xDA, 0xB7, 0x60, 0xD7, 0xF4, 0x68, + 0x1D, 0x4F, 0x42, 0xA3, 0xDE, 0x39, 0x4D, 0xF4, + 0xAE, 0x56, 0xED, 0xE7, 0x63, 0x72, 0xBB, 0x19, + 0x0B, 0x07, 0xA7, 0xC8, 0xEE, 0x0A, 0x6D, 0x70, + 0x9E, 0x02, 0xFC, 0xE1, 0xCD, 0xF7, 0xE2, 0xEC, + 0xC0, 0x34, 0x04, 0xCD, 0x28, 0x34, 0x2F, 0x61, + 0x91, 0x72, 0xFE, 0x9C, 0xE9, 0x85, 0x83, 0xFF, + 0x8E, 0x4F, 0x12, 0x32, 0xEE, 0xF2, 0x81, 0x83, + 0xC3, 0xFE, 0x3B, 0x1B, 0x4C, 0x6F, 0xAD, 0x73, + 0x3B, 0xB5, 0xFC, 0xBC, 0x2E, 0xC2, 0x20, 0x05, + 0xC5, 0x8E, 0xF1, 0x83, 0x7D, 0x16, 0x83, 0xB2, + 0xC6, 0xF3, 0x4A, 0x26, 0xC1, 0xB2, 0xEF, 0xFA, + 0x88, 0x6B, 0x42, 0x38, 0x61, 0x1F, 0xCF, 0xDC, + 0xDE, 0x35, 0x5B, 0x3B, 0x65, 0x19, 0x03, 0x5B, + 0xBC, 0x34, 0xF4, 0xDE, 0xF9, 0x9C, 0x02, 0x38, + 0x61, 0xB4, 0x6F, 0xC9, 0xD6, 0xE6, 0xC9, 0x07, + 0x7A, 0xD9, 0x1D, 0x26, 0x91, 0xF7, 0xF7, 0xEE, + 0x59, 0x8C, 0xB0, 0xFA, 0xC1, 0x86, 0xD9, 0x1C, + 0xAE, 0xFE, 0x13, 0x09, 0x85, 0x13, 0x92, 0x70, + 0xB4, 0x13, 0x0C, 0x93, 0xBC, 0x43, 0x79, 0x44, + 0xF4, 0xFD, 0x44, 0x52, 0xE2, 0xD7, 0x4D, 0xD3, + 0x64, 0xF2, 0xE2, 0x1E, 0x71, 0xF5, 0x4B, 0xFF, + 0x5C, 0xAE, 0x82, 0xAB, 0x9C, 0x9D, 0xF6, 0x9E, + 0xE8, 0x6D, 0x2B, 0xC5, 0x22, 0x36, 0x3A, 0x0D, + 0xAB, 0xC5, 0x21, 0x97, 0x9B, 0x0D, 0xEA, 0xDA, + 0x1D, 0xBF, 0x9A, 0x42, 0xD5, 0xC4, 0x48, 0x4E, + 0x0A, 0xBC, 0xD0, 0x6B, 0xFA, 0x53, 0xDD, 0xEF, + 0x3C, 0x1B, 0x20, 0xEE, 0x3F, 0xD5, 0x9D, 0x7C, + 0x25, 0xE4, 0x1D, 0x2B, 0x66, 0x9E, 0x1E, 0xF1, + 0x6E, 0x6F, 0x52, 0xC3, 0x16, 0x4D, 0xF4, 0xFB, + 0x79, 0x30, 0xE9, 0xE4, 0xE5, 0x88, 0x57, 0xB6, + 0xAC, 0x7D, 0x5F, 0x42, 0xD6, 0x9F, 0x6D, 0x18, + 0x77, 0x63, 0xCF, 0x1D, 0x55, 0x03, 0x40, 0x04, + 0x87, 0xF5, 0x5B, 0xA5, 0x7E, 0x31, 0xCC, 0x7A, + 0x71, 0x35, 0xC8, 0x86, 0xEF, 0xB4, 0x31, 0x8A, + 0xED, 0x6A, 0x1E, 0x01, 0x2D, 0x9E, 0x68, 0x32, + 0xA9, 0x07, 0x60, 0x0A, 0x91, 0x81, 0x30, 0xC4, + 0x6D, 0xC7, 0x78, 0xF9, 0x71, 0xAD, 0x00, 0x38, + 0x09, 0x29, 0x99, 0xA3, 0x33, 0xCB, 0x8B, 0x7A, + 0x1A, 0x1D, 0xB9, 0x3D, 0x71, 0x40, 0x00, 0x3C, + 0x2A, 0x4E, 0xCE, 0xA9, 0xF9, 0x8D, 0x0A, 0xCC, + 0x0A, 0x82, 0x91, 0xCD, 0xCE, 0xC9, 0x7D, 0xCF, + 0x8E, 0xC9, 0xB5, 0x5A, 0x7F, 0x88, 0xA4, 0x6B, + 0x4D, 0xB5, 0xA8, 0x51, 0xF4, 0x41, 0x82, 0xE1, + 0xC6, 0x8A, 0x00, 0x7E, 0x5E, 0x0D, 0xD9, 0x02, + 0x0B, 0xFD, 0x64, 0xB6, 0x45, 0x03, 0x6C, 0x7A, + 0x4E, 0x67, 0x7D, 0x2C, 0x38, 0x53, 0x2A, 0x3A, + 0x23, 0xBA, 0x44, 0x42, 0xCA, 0xF5, 0x3E, 0xA6, + 0x3B, 0xB4, 0x54, 0x32, 0x9B, 0x76, 0x24, 0xC8, + 0x91, 0x7B, 0xDD, 0x64, 0xB1, 0xC0, 0xFD, 0x4C, + 0xB3, 0x8E, 0x8C, 0x33, 0x4C, 0x70, 0x1C, 0x3A, + 0xCD, 0xAD, 0x06, 0x57, 0xFC, 0xCF, 0xEC, 0x71, + 0x9B, 0x1F, 0x5C, 0x3E, 0x4E, 0x46, 0x04, 0x1F, + 0x38, 0x81, 0x47, 0xFB, 0x4C, 0xFD, 0xB4, 0x77, + 0xA5, 0x24, 0x71, 0xF7, 0xA9, 0xA9, 0x69, 0x10, + 0xB8, 0x55, 0x32, 0x2E, 0xDB, 0x63, 0x40, 0xD8, + 0xA0, 0x0E, 0xF0, 0x92, 0x35, 0x05, 0x11, 0xE3, + 0x0A, 0xBE, 0xC1, 0xFF, 0xF9, 0xE3, 0xA2, 0x6E, + 0x7F, 0xB2, 0x9F, 0x8C, 0x18, 0x30, 0x23, 0xC3, + 0x58, 0x7E, 0x38, 0xDA, 0x00, 0x77, 0xD9, 0xB4, + 0x76, 0x3E, 0x4E, 0x4B, 0x94, 0xB2, 0xBB, 0xC1, + 0x94, 0xC6, 0x65, 0x1E, 0x77, 0xCA, 0xF9, 0x92, + 0xEE, 0xAA, 0xC0, 0x23, 0x2A, 0x28, 0x1B, 0xF6, + 0xB3, 0xA7, 0x39, 0xC1, 0x22, 0x61, 0x16, 0x82, + 0x0A, 0xE8, 0xDB, 0x58, 0x47, 0xA6, 0x7C, 0xBE, + 0xF9, 0xC9, 0x09, 0x1B, 0x46, 0x2D, 0x53, 0x8C, + 0xD7, 0x2B, 0x03, 0x74, 0x6A, 0xE7, 0x7F, 0x5E, + 0x62, 0x29, 0x2C, 0x31, 0x15, 0x62, 0xA8, 0x46, + 0x50, 0x5D, 0xC8, 0x2D, 0xB8, 0x54, 0x33, 0x8A, + 0xE4, 0x9F, 0x52, 0x35, 0xC9, 0x5B, 0x91, 0x17, + 0x8C, 0xCF, 0x2D, 0xD5, 0xCA, 0xCE, 0xF4, 0x03, + 0xEC, 0x9D, 0x18, 0x10, 0xC6, 0x27, 0x2B, 0x04, + 0x5B, 0x3B, 0x71, 0xF9, 0xDC, 0x6B, 0x80, 0xD6, + 0x3F, 0xDD, 0x4A, 0x8E, 0x9A, 0xDB, 0x1E, 0x69, + 0x62, 0xA6, 0x95, 0x26, 0xD4, 0x31, 0x61, 0xC1, + 0xA4, 0x1D, 0x57, 0x0D, 0x79, 0x38, 0xDA, 0xD4, + 0xA4, 0x0E, 0x32, 0x9C, 0xD0, 0xE4, 0x0E, 0x65, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF +}; + +static const BYTE rgbTls7919ffdhe8192[] = { + //P + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xAD, 0xF8, 0x54, 0x58, 0xA2, 0xBB, 0x4A, 0x9A, + 0xAF, 0xDC, 0x56, 0x20, 0x27, 0x3D, 0x3C, 0xF1, + 0xD8, 0xB9, 0xC5, 0x83, 0xCE, 0x2D, 0x36, 0x95, + 0xA9, 0xE1, 0x36, 0x41, 0x14, 0x64, 0x33, 0xFB, + 0xCC, 0x93, 0x9D, 0xCE, 0x24, 0x9B, 0x3E, 0xF9, + 0x7D, 0x2F, 0xE3, 0x63, 0x63, 0x0C, 0x75, 0xD8, + 0xF6, 0x81, 0xB2, 0x02, 0xAE, 0xC4, 0x61, 0x7A, + 0xD3, 0xDF, 0x1E, 0xD5, 0xD5, 0xFD, 0x65, 0x61, + 0x24, 0x33, 0xF5, 0x1F, 0x5F, 0x06, 0x6E, 0xD0, + 0x85, 0x63, 0x65, 0x55, 0x3D, 0xED, 0x1A, 0xF3, + 0xB5, 0x57, 0x13, 0x5E, 0x7F, 0x57, 0xC9, 0x35, + 0x98, 0x4F, 0x0C, 0x70, 0xE0, 0xE6, 0x8B, 0x77, + 0xE2, 0xA6, 0x89, 0xDA, 0xF3, 0xEF, 0xE8, 0x72, + 0x1D, 0xF1, 0x58, 0xA1, 0x36, 0xAD, 0xE7, 0x35, + 0x30, 0xAC, 0xCA, 0x4F, 0x48, 0x3A, 0x79, 0x7A, + 0xBC, 0x0A, 0xB1, 0x82, 0xB3, 0x24, 0xFB, 0x61, + 0xD1, 0x08, 0xA9, 0x4B, 0xB2, 0xC8, 0xE3, 0xFB, + 0xB9, 0x6A, 0xDA, 0xB7, 0x60, 0xD7, 0xF4, 0x68, + 0x1D, 0x4F, 0x42, 0xA3, 0xDE, 0x39, 0x4D, 0xF4, + 0xAE, 0x56, 0xED, 0xE7, 0x63, 0x72, 0xBB, 0x19, + 0x0B, 0x07, 0xA7, 0xC8, 0xEE, 0x0A, 0x6D, 0x70, + 0x9E, 0x02, 0xFC, 0xE1, 0xCD, 0xF7, 0xE2, 0xEC, + 0xC0, 0x34, 0x04, 0xCD, 0x28, 0x34, 0x2F, 0x61, + 0x91, 0x72, 0xFE, 0x9C, 0xE9, 0x85, 0x83, 0xFF, + 0x8E, 0x4F, 0x12, 0x32, 0xEE, 0xF2, 0x81, 0x83, + 0xC3, 0xFE, 0x3B, 0x1B, 0x4C, 0x6F, 0xAD, 0x73, + 0x3B, 0xB5, 0xFC, 0xBC, 0x2E, 0xC2, 0x20, 0x05, + 0xC5, 0x8E, 0xF1, 0x83, 0x7D, 0x16, 0x83, 0xB2, + 0xC6, 0xF3, 0x4A, 0x26, 0xC1, 0xB2, 0xEF, 0xFA, + 0x88, 0x6B, 0x42, 0x38, 0x61, 0x1F, 0xCF, 0xDC, + 0xDE, 0x35, 0x5B, 0x3B, 0x65, 0x19, 0x03, 0x5B, + 0xBC, 0x34, 0xF4, 0xDE, 0xF9, 0x9C, 0x02, 0x38, + 0x61, 0xB4, 0x6F, 0xC9, 0xD6, 0xE6, 0xC9, 0x07, + 0x7A, 0xD9, 0x1D, 0x26, 0x91, 0xF7, 0xF7, 0xEE, + 0x59, 0x8C, 0xB0, 0xFA, 0xC1, 0x86, 0xD9, 0x1C, + 0xAE, 0xFE, 0x13, 0x09, 0x85, 0x13, 0x92, 0x70, + 0xB4, 0x13, 0x0C, 0x93, 0xBC, 0x43, 0x79, 0x44, + 0xF4, 0xFD, 0x44, 0x52, 0xE2, 0xD7, 0x4D, 0xD3, + 0x64, 0xF2, 0xE2, 0x1E, 0x71, 0xF5, 0x4B, 0xFF, + 0x5C, 0xAE, 0x82, 0xAB, 0x9C, 0x9D, 0xF6, 0x9E, + 0xE8, 0x6D, 0x2B, 0xC5, 0x22, 0x36, 0x3A, 0x0D, + 0xAB, 0xC5, 0x21, 0x97, 0x9B, 0x0D, 0xEA, 0xDA, + 0x1D, 0xBF, 0x9A, 0x42, 0xD5, 0xC4, 0x48, 0x4E, + 0x0A, 0xBC, 0xD0, 0x6B, 0xFA, 0x53, 0xDD, 0xEF, + 0x3C, 0x1B, 0x20, 0xEE, 0x3F, 0xD5, 0x9D, 0x7C, + 0x25, 0xE4, 0x1D, 0x2B, 0x66, 0x9E, 0x1E, 0xF1, + 0x6E, 0x6F, 0x52, 0xC3, 0x16, 0x4D, 0xF4, 0xFB, + 0x79, 0x30, 0xE9, 0xE4, 0xE5, 0x88, 0x57, 0xB6, + 0xAC, 0x7D, 0x5F, 0x42, 0xD6, 0x9F, 0x6D, 0x18, + 0x77, 0x63, 0xCF, 0x1D, 0x55, 0x03, 0x40, 0x04, + 0x87, 0xF5, 0x5B, 0xA5, 0x7E, 0x31, 0xCC, 0x7A, + 0x71, 0x35, 0xC8, 0x86, 0xEF, 0xB4, 0x31, 0x8A, + 0xED, 0x6A, 0x1E, 0x01, 0x2D, 0x9E, 0x68, 0x32, + 0xA9, 0x07, 0x60, 0x0A, 0x91, 0x81, 0x30, 0xC4, + 0x6D, 0xC7, 0x78, 0xF9, 0x71, 0xAD, 0x00, 0x38, + 0x09, 0x29, 0x99, 0xA3, 0x33, 0xCB, 0x8B, 0x7A, + 0x1A, 0x1D, 0xB9, 0x3D, 0x71, 0x40, 0x00, 0x3C, + 0x2A, 0x4E, 0xCE, 0xA9, 0xF9, 0x8D, 0x0A, 0xCC, + 0x0A, 0x82, 0x91, 0xCD, 0xCE, 0xC9, 0x7D, 0xCF, + 0x8E, 0xC9, 0xB5, 0x5A, 0x7F, 0x88, 0xA4, 0x6B, + 0x4D, 0xB5, 0xA8, 0x51, 0xF4, 0x41, 0x82, 0xE1, + 0xC6, 0x8A, 0x00, 0x7E, 0x5E, 0x0D, 0xD9, 0x02, + 0x0B, 0xFD, 0x64, 0xB6, 0x45, 0x03, 0x6C, 0x7A, + 0x4E, 0x67, 0x7D, 0x2C, 0x38, 0x53, 0x2A, 0x3A, + 0x23, 0xBA, 0x44, 0x42, 0xCA, 0xF5, 0x3E, 0xA6, + 0x3B, 0xB4, 0x54, 0x32, 0x9B, 0x76, 0x24, 0xC8, + 0x91, 0x7B, 0xDD, 0x64, 0xB1, 0xC0, 0xFD, 0x4C, + 0xB3, 0x8E, 0x8C, 0x33, 0x4C, 0x70, 0x1C, 0x3A, + 0xCD, 0xAD, 0x06, 0x57, 0xFC, 0xCF, 0xEC, 0x71, + 0x9B, 0x1F, 0x5C, 0x3E, 0x4E, 0x46, 0x04, 0x1F, + 0x38, 0x81, 0x47, 0xFB, 0x4C, 0xFD, 0xB4, 0x77, + 0xA5, 0x24, 0x71, 0xF7, 0xA9, 0xA9, 0x69, 0x10, + 0xB8, 0x55, 0x32, 0x2E, 0xDB, 0x63, 0x40, 0xD8, + 0xA0, 0x0E, 0xF0, 0x92, 0x35, 0x05, 0x11, 0xE3, + 0x0A, 0xBE, 0xC1, 0xFF, 0xF9, 0xE3, 0xA2, 0x6E, + 0x7F, 0xB2, 0x9F, 0x8C, 0x18, 0x30, 0x23, 0xC3, + 0x58, 0x7E, 0x38, 0xDA, 0x00, 0x77, 0xD9, 0xB4, + 0x76, 0x3E, 0x4E, 0x4B, 0x94, 0xB2, 0xBB, 0xC1, + 0x94, 0xC6, 0x65, 0x1E, 0x77, 0xCA, 0xF9, 0x92, + 0xEE, 0xAA, 0xC0, 0x23, 0x2A, 0x28, 0x1B, 0xF6, + 0xB3, 0xA7, 0x39, 0xC1, 0x22, 0x61, 0x16, 0x82, + 0x0A, 0xE8, 0xDB, 0x58, 0x47, 0xA6, 0x7C, 0xBE, + 0xF9, 0xC9, 0x09, 0x1B, 0x46, 0x2D, 0x53, 0x8C, + 0xD7, 0x2B, 0x03, 0x74, 0x6A, 0xE7, 0x7F, 0x5E, + 0x62, 0x29, 0x2C, 0x31, 0x15, 0x62, 0xA8, 0x46, + 0x50, 0x5D, 0xC8, 0x2D, 0xB8, 0x54, 0x33, 0x8A, + 0xE4, 0x9F, 0x52, 0x35, 0xC9, 0x5B, 0x91, 0x17, + 0x8C, 0xCF, 0x2D, 0xD5, 0xCA, 0xCE, 0xF4, 0x03, + 0xEC, 0x9D, 0x18, 0x10, 0xC6, 0x27, 0x2B, 0x04, + 0x5B, 0x3B, 0x71, 0xF9, 0xDC, 0x6B, 0x80, 0xD6, + 0x3F, 0xDD, 0x4A, 0x8E, 0x9A, 0xDB, 0x1E, 0x69, + 0x62, 0xA6, 0x95, 0x26, 0xD4, 0x31, 0x61, 0xC1, + 0xA4, 0x1D, 0x57, 0x0D, 0x79, 0x38, 0xDA, 0xD4, + 0xA4, 0x0E, 0x32, 0x9C, 0xCF, 0xF4, 0x6A, 0xAA, + 0x36, 0xAD, 0x00, 0x4C, 0xF6, 0x00, 0xC8, 0x38, + 0x1E, 0x42, 0x5A, 0x31, 0xD9, 0x51, 0xAE, 0x64, + 0xFD, 0xB2, 0x3F, 0xCE, 0xC9, 0x50, 0x9D, 0x43, + 0x68, 0x7F, 0xEB, 0x69, 0xED, 0xD1, 0xCC, 0x5E, + 0x0B, 0x8C, 0xC3, 0xBD, 0xF6, 0x4B, 0x10, 0xEF, + 0x86, 0xB6, 0x31, 0x42, 0xA3, 0xAB, 0x88, 0x29, + 0x55, 0x5B, 0x2F, 0x74, 0x7C, 0x93, 0x26, 0x65, + 0xCB, 0x2C, 0x0F, 0x1C, 0xC0, 0x1B, 0xD7, 0x02, + 0x29, 0x38, 0x88, 0x39, 0xD2, 0xAF, 0x05, 0xE4, + 0x54, 0x50, 0x4A, 0xC7, 0x8B, 0x75, 0x82, 0x82, + 0x28, 0x46, 0xC0, 0xBA, 0x35, 0xC3, 0x5F, 0x5C, + 0x59, 0x16, 0x0C, 0xC0, 0x46, 0xFD, 0x82, 0x51, + 0x54, 0x1F, 0xC6, 0x8C, 0x9C, 0x86, 0xB0, 0x22, + 0xBB, 0x70, 0x99, 0x87, 0x6A, 0x46, 0x0E, 0x74, + 0x51, 0xA8, 0xA9, 0x31, 0x09, 0x70, 0x3F, 0xEE, + 0x1C, 0x21, 0x7E, 0x6C, 0x38, 0x26, 0xE5, 0x2C, + 0x51, 0xAA, 0x69, 0x1E, 0x0E, 0x42, 0x3C, 0xFC, + 0x99, 0xE9, 0xE3, 0x16, 0x50, 0xC1, 0x21, 0x7B, + 0x62, 0x48, 0x16, 0xCD, 0xAD, 0x9A, 0x95, 0xF9, + 0xD5, 0xB8, 0x01, 0x94, 0x88, 0xD9, 0xC0, 0xA0, + 0xA1, 0xFE, 0x30, 0x75, 0xA5, 0x77, 0xE2, 0x31, + 0x83, 0xF8, 0x1D, 0x4A, 0x3F, 0x2F, 0xA4, 0x57, + 0x1E, 0xFC, 0x8C, 0xE0, 0xBA, 0x8A, 0x4F, 0xE8, + 0xB6, 0x85, 0x5D, 0xFE, 0x72, 0xB0, 0xA6, 0x6E, + 0xDE, 0xD2, 0xFB, 0xAB, 0xFB, 0xE5, 0x8A, 0x30, + 0xFA, 0xFA, 0xBE, 0x1C, 0x5D, 0x71, 0xA8, 0x7E, + 0x2F, 0x74, 0x1E, 0xF8, 0xC1, 0xFE, 0x86, 0xFE, + 0xA6, 0xBB, 0xFD, 0xE5, 0x30, 0x67, 0x7F, 0x0D, + 0x97, 0xD1, 0x1D, 0x49, 0xF7, 0xA8, 0x44, 0x3D, + 0x08, 0x22, 0xE5, 0x06, 0xA9, 0xF4, 0x61, 0x4E, + 0x01, 0x1E, 0x2A, 0x94, 0x83, 0x8F, 0xF8, 0x8C, + 0xD6, 0x8C, 0x8B, 0xB7, 0xC5, 0xC6, 0x42, 0x4C, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF +}; + +#endif // 1 + +// Definitions +static const SYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS paramsModp2048 = +{ + .eDhSafePrimeType = SYMCRYPT_DLGROUP_DH_SAFEPRIMETYPE_IKE_3526, + .pcbPrimeP = rgbIke3526Modp2048, + .nBitsOfP = 2048, + .nMinBitsPriv = 224, // 2s = 2 * 112 + .nDefaultBitsPriv = 256 // rounding nMinBitsPriv up to the nearest 128 +}; + +static const SYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS paramsModp3072 = +{ + .eDhSafePrimeType = SYMCRYPT_DLGROUP_DH_SAFEPRIMETYPE_IKE_3526, + .pcbPrimeP = rgbIke3526Modp3072, + .nBitsOfP = 3072, + .nMinBitsPriv = 256, // 2s = 2 * 128 + .nDefaultBitsPriv = 256 // rounding nMinBitsPriv up to the nearest 128 +}; + +static const SYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS paramsModp4096 = +{ + .eDhSafePrimeType = SYMCRYPT_DLGROUP_DH_SAFEPRIMETYPE_IKE_3526, + .pcbPrimeP = rgbIke3526Modp4096, + .nBitsOfP = 4096, + .nMinBitsPriv = 304, // 2s = 2 * 152 + .nDefaultBitsPriv = 384 // rounding nMinBitsPriv up to the nearest 128 +}; + +static const SYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS paramsModp6144 = +{ + .eDhSafePrimeType = SYMCRYPT_DLGROUP_DH_SAFEPRIMETYPE_IKE_3526, + .pcbPrimeP = rgbIke3526Modp6144, + .nBitsOfP = 6144, + .nMinBitsPriv = 352, // 2s = 2 * 176 + .nDefaultBitsPriv = 384 // rounding nMinBitsPriv up to the nearest 128 +}; + +static const SYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS paramsModp8192 = +{ + .eDhSafePrimeType = SYMCRYPT_DLGROUP_DH_SAFEPRIMETYPE_IKE_3526, + .pcbPrimeP = rgbIke3526Modp8192, + .nBitsOfP = 8192, + .nMinBitsPriv = 400, // 2s = 2 * 200 + .nDefaultBitsPriv = 512 // rounding nMinBitsPriv up to the nearest 128 +}; + +static const SYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS paramsffdhe2048 = +{ + .eDhSafePrimeType = SYMCRYPT_DLGROUP_DH_SAFEPRIMETYPE_TLS_7919, + .pcbPrimeP = rgbTls7919ffdhe2048, + .nBitsOfP = 2048, + .nMinBitsPriv = 224, // 2s = 2 * 112 + .nDefaultBitsPriv = 256 // rounding nMinBitsPriv up to the nearest 128 +}; + +static const SYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS paramsffdhe3072 = +{ + .eDhSafePrimeType = SYMCRYPT_DLGROUP_DH_SAFEPRIMETYPE_TLS_7919, + .pcbPrimeP = rgbTls7919ffdhe3072, + .nBitsOfP = 3072, + .nMinBitsPriv = 256, // 2s = 2 * 128 + .nDefaultBitsPriv = 256 // rounding nMinBitsPriv up to the nearest 128 +}; + +static const SYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS paramsffdhe4096 = +{ + .eDhSafePrimeType = SYMCRYPT_DLGROUP_DH_SAFEPRIMETYPE_TLS_7919, + .pcbPrimeP = rgbTls7919ffdhe4096, + .nBitsOfP = 4096, + .nMinBitsPriv = 304, // 2s = 2 * 152 + .nDefaultBitsPriv = 384 // rounding nMinBitsPriv up to the nearest 128 +}; + +static const SYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS paramsffdhe6144 = +{ + .eDhSafePrimeType = SYMCRYPT_DLGROUP_DH_SAFEPRIMETYPE_TLS_7919, + .pcbPrimeP = rgbTls7919ffdhe6144, + .nBitsOfP = 6144, + .nMinBitsPriv = 352, // 2s = 2 * 176 + .nDefaultBitsPriv = 384 // rounding nMinBitsPriv up to the nearest 128 +}; + +static const SYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS paramsffdhe8192 = +{ + .eDhSafePrimeType = SYMCRYPT_DLGROUP_DH_SAFEPRIMETYPE_TLS_7919, + .pcbPrimeP = rgbTls7919ffdhe8192, + .nBitsOfP = 8192, + .nMinBitsPriv = 400, // 2s = 2 * 200 + .nDefaultBitsPriv = 512 // rounding nMinBitsPriv up to the nearest 128 +}; + +const PCSYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS SymCryptDlgroupDhSafePrimeParamsModp2048 = ¶msModp2048; +const PCSYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS SymCryptDlgroupDhSafePrimeParamsModp3072 = ¶msModp3072; +const PCSYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS SymCryptDlgroupDhSafePrimeParamsModp4096 = ¶msModp4096; +const PCSYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS SymCryptDlgroupDhSafePrimeParamsModp6144 = ¶msModp6144; +const PCSYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS SymCryptDlgroupDhSafePrimeParamsModp8192 = ¶msModp8192; + +const PCSYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS SymCryptDlgroupDhSafePrimeParamsffdhe2048 = ¶msffdhe2048; +const PCSYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS SymCryptDlgroupDhSafePrimeParamsffdhe3072 = ¶msffdhe3072; +const PCSYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS SymCryptDlgroupDhSafePrimeParamsffdhe4096 = ¶msffdhe4096; +const PCSYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS SymCryptDlgroupDhSafePrimeParamsffdhe6144 = ¶msffdhe6144; +const PCSYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS SymCryptDlgroupDhSafePrimeParamsffdhe8192 = ¶msffdhe8192; + +// Note, we rely on the ordering of the parameters from largest to smallest within each named set of +// safe-prime groups as we iterate through them assuming this order in SymCryptDlgroupSetValueSafePrime +const PCSYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS SymCryptNamedSafePrimeGroups[SYMCRYPT_DH_SAFEPRIME_GROUP_COUNT] = +{ + ¶msModp8192, + ¶msModp6144, + ¶msModp4096, + ¶msModp3072, + ¶msModp2048, + ¶msffdhe8192, + ¶msffdhe6144, + ¶msffdhe4096, + ¶msffdhe3072, + ¶msffdhe2048, +}; diff --git a/libs/symcrypt/lib/dlgroup.c b/libs/symcrypt/lib/dlgroup.c new file mode 100644 index 00000000000..021c0e0145e --- /dev/null +++ b/libs/symcrypt/lib/dlgroup.c @@ -0,0 +1,2016 @@ +// +// dlgroup.c Dlgroup functions +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// + +#include "precomp.h" + +// Miller-Rabin iterations for prime generation +#define DLGROUP_MR_ITERATIONS (64) + +// Default size for Q according to FIPS 186-3 +static const struct _DSA_NBITSOFQ_CUTOFFS { + UINT32 nBitsOfP; + UINT32 nBitsOfQ; +} g_nBitsOfQ_Cutoffs[] = { + { 1024, 160 }, + { 2048, 256 }, + { UINT32_MAX, 256 }, +}; + +// Const label for the generation of generator G according to FIPS 186-3 +static const BYTE ggen[] = { 'g', 'g', 'e', 'n' }; + +UINT32 +SYMCRYPT_CALL +SymCryptDlgroupCalculateBitsizeOfQ( UINT32 nBitsOfP ) +{ + UINT32 i = 0; + while ( (i<SYMCRYPT_ARRAY_SIZE(g_nBitsOfQ_Cutoffs) - 1) && + (g_nBitsOfQ_Cutoffs[i].nBitsOfP < nBitsOfP) ) + { + i++; + }; + + return g_nBitsOfQ_Cutoffs[i].nBitsOfQ; +} + +PSYMCRYPT_DLGROUP +SYMCRYPT_CALL +SymCryptDlgroupAllocate( UINT32 nBitsOfP, UINT32 nBitsOfQ ) +{ + PVOID p; + SIZE_T cb; + PSYMCRYPT_DLGROUP res = NULL; + + // Invalid parameters + if ( (nBitsOfP < SYMCRYPT_DLGROUP_MIN_BITSIZE_P) || + ((nBitsOfQ > 0) && (nBitsOfQ < SYMCRYPT_DLGROUP_MIN_BITSIZE_Q)) || + (nBitsOfP < nBitsOfQ) ) + { + goto cleanup; + } + + cb = SymCryptSizeofDlgroupFromBitsizes( nBitsOfP, nBitsOfQ ); + + p = SymCryptCallbackAlloc( cb ); + + if ( p==NULL ) + { + goto cleanup; + } + + res = SymCryptDlgroupCreate( p, cb, nBitsOfP, nBitsOfQ ); + +cleanup: + return res; +} + +VOID +SYMCRYPT_CALL +SymCryptDlgroupFree( _Out_ PSYMCRYPT_DLGROUP pgObj ) +{ + SYMCRYPT_CHECK_MAGIC( pgObj ); + SymCryptDlgroupWipe( pgObj ); + SymCryptCallbackFree( pgObj ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptSizeofDlgroupFromBitsizes( UINT32 nBitsOfP, UINT32 nBitsOfQ ) +{ + UINT32 cbSeed = 0; + + if (nBitsOfQ == 0) + { + nBitsOfQ = nBitsOfP-1; // Default to the maximum possible size for Q + } + + // Invalid parameters + if ( (nBitsOfP < SYMCRYPT_DLGROUP_MIN_BITSIZE_P) || + (nBitsOfQ < SYMCRYPT_DLGROUP_MIN_BITSIZE_Q) || + (nBitsOfP < nBitsOfQ) ) + { + return 0; + } + + if ( nBitsOfP == nBitsOfQ ) + { + nBitsOfQ--; + } + + // Calculate the (tight) bytesize of the seed + cbSeed = (nBitsOfQ+7)/8; + + return sizeof(SYMCRYPT_DLGROUP) + + SYMCRYPT_SIZEOF_MODULUS_FROM_BITS( nBitsOfP ) + + SYMCRYPT_SIZEOF_MODULUS_FROM_BITS( nBitsOfQ ) + + SYMCRYPT_SIZEOF_MODELEMENT_FROM_BITS( nBitsOfP ) + + ((cbSeed + SYMCRYPT_ASYM_ALIGN_VALUE - 1)/SYMCRYPT_ASYM_ALIGN_VALUE)*SYMCRYPT_ASYM_ALIGN_VALUE; // Make sure that the entire structure is ASYM_ALIGNED. +} + +PSYMCRYPT_DLGROUP +SYMCRYPT_CALL +SymCryptDlgroupCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + UINT32 nBitsOfP, + UINT32 nBitsOfQ ) +{ + PSYMCRYPT_DLGROUP pDlgroup = NULL; + + UINT32 cbModP; + UINT32 cbModQ; + UINT32 cbModElement; + + SYMCRYPT_ASSERT( cbBuffer >= SymCryptSizeofDlgroupFromBitsizes( nBitsOfP, nBitsOfQ ) ); + UNREFERENCED_PARAMETER( cbBuffer ); // only referenced in ASSERTs... + SYMCRYPT_ASSERT_ASYM_ALIGNED( pbBuffer ); + + // Invalid parameters + if ( (nBitsOfP < SYMCRYPT_DLGROUP_MIN_BITSIZE_P) || + ((nBitsOfQ > 0) && (nBitsOfQ < SYMCRYPT_DLGROUP_MIN_BITSIZE_Q)) || + (nBitsOfP < nBitsOfQ) ) + { + goto cleanup; + } + + if ( nBitsOfP == nBitsOfQ ) + { + nBitsOfQ--; + } + + pDlgroup = (PSYMCRYPT_DLGROUP) pbBuffer; + + SYMCRYPT_ASSERT( cbBuffer > sizeof(SYMCRYPT_DLGROUP) ); + + // DLGROUP parameters + pDlgroup->cbTotalSize = SymCryptSizeofDlgroupFromBitsizes( nBitsOfP, nBitsOfQ ); + pDlgroup->fHasPrimeQ = FALSE; + + pDlgroup->nBitsOfP = nBitsOfP; + pDlgroup->cbPrimeP = (nBitsOfP+7)/8; + pDlgroup->nDigitsOfP = SymCryptDigitsFromBits( nBitsOfP ); + pDlgroup->nMaxBitsOfP = nBitsOfP; + + pDlgroup->nBitsOfQ = nBitsOfQ; // 0 value possible + pDlgroup->cbPrimeQ = (nBitsOfQ+7)/8; // 0 value possible + pDlgroup->nDigitsOfQ = (nBitsOfQ>0)?SymCryptDigitsFromBits( nBitsOfQ ):0; // 0 value possible + pDlgroup->nMaxBitsOfQ = (nBitsOfQ==0)?(nBitsOfP-1):nBitsOfQ; + + pDlgroup->isSafePrimeGroup = FALSE; + pDlgroup->nMinBitsPriv = 0; + pDlgroup->nDefaultBitsPriv = nBitsOfQ; // 0 value possible + + pDlgroup->nBitsOfSeed = nBitsOfQ; // 0 value possible + pDlgroup->cbSeed = (pDlgroup->nBitsOfSeed+7)/8; // 0 value possible + + pDlgroup->eFipsStandard = SYMCRYPT_DLGROUP_FIPS_NONE; // This will be set either on generate or import + pDlgroup->pHashAlgorithm = NULL; // Like-wise + pDlgroup->dwGenCounter = 0; // Like-wise + pDlgroup->bIndexGenG = 1; // Default: 1 + + // Create SymCrypt objects + pbBuffer += sizeof(SYMCRYPT_DLGROUP); + + cbModP = SymCryptSizeofModulusFromDigits( pDlgroup->nDigitsOfP ); + SYMCRYPT_ASSERT( cbBuffer > sizeof(SYMCRYPT_DLGROUP) + cbModP ); + pDlgroup->pmP = SymCryptModulusCreate( pbBuffer, cbModP, pDlgroup->nDigitsOfP ); + pbBuffer += cbModP; + + // + // **** Always defer the creation of the Q modulus until the group generation or + // import of the modulus. This way it is always the fastest possible even when the caller + // specified nBitsOfQ = 0. + // + if (nBitsOfQ>0) + { + cbModQ = SymCryptSizeofModulusFromDigits( pDlgroup->nDigitsOfQ ); + } + else + { + cbModQ = cbModP; + } + SYMCRYPT_ASSERT( cbBuffer > sizeof(SYMCRYPT_DLGROUP) + cbModP + cbModQ ); + pDlgroup->pbQ = pbBuffer; // Set the aligned buffer + pDlgroup->pmQ = NULL; + pbBuffer += cbModQ; + + cbModElement = SymCryptSizeofModElementFromModulus( pDlgroup->pmP ); + SYMCRYPT_ASSERT( cbBuffer > sizeof(SYMCRYPT_DLGROUP) + cbModP + cbModQ + cbModElement ); + pDlgroup->peG = SymCryptModElementCreate( pbBuffer, cbModElement, pDlgroup->pmP ); + pbBuffer += cbModElement; + + pDlgroup->pbSeed = pbBuffer; + + // Setting the magic + SYMCRYPT_SET_MAGIC( pDlgroup ); + +cleanup: + return pDlgroup; +} + +VOID +SYMCRYPT_CALL +SymCryptDlgroupWipe( _Out_ PSYMCRYPT_DLGROUP pgDst ) +{ + SymCryptWipe( (PBYTE) pgDst, pgDst->cbTotalSize ); +} + +VOID +SYMCRYPT_CALL +SymCryptDlgroupCopy( + _In_ PCSYMCRYPT_DLGROUP pgSrc, + _Out_ PSYMCRYPT_DLGROUP pgDst ) +{ + // + // in-place copy is somewhat common... + // + if( pgSrc != pgDst ) + { + pgDst->cbTotalSize = pgSrc->cbTotalSize; + pgDst->fHasPrimeQ = pgSrc->fHasPrimeQ; + + pgDst->nBitsOfP = pgSrc->nBitsOfP; + pgDst->cbPrimeP = pgSrc->cbPrimeP; + pgDst->nDigitsOfP = pgSrc->nDigitsOfP; + pgDst->nMaxBitsOfP = pgSrc->nMaxBitsOfP; + + pgDst->nBitsOfQ = pgSrc->nBitsOfQ; + pgDst->cbPrimeQ = pgSrc->cbPrimeQ; + pgDst->nDigitsOfQ = pgSrc->nDigitsOfQ; + pgDst->nMaxBitsOfQ = pgSrc->nMaxBitsOfQ; + + pgDst->isSafePrimeGroup = pgSrc->isSafePrimeGroup; + pgDst->nMinBitsPriv = pgSrc->nMinBitsPriv; + pgDst->nDefaultBitsPriv = pgSrc->nDefaultBitsPriv; + + pgDst->nBitsOfSeed = pgSrc->nBitsOfSeed; + pgDst->cbSeed = pgSrc->cbSeed; + + pgDst->eFipsStandard = pgSrc->eFipsStandard; + pgDst->pHashAlgorithm = pgSrc->pHashAlgorithm; + pgDst->dwGenCounter = pgSrc->dwGenCounter; + pgDst->bIndexGenG = pgSrc->bIndexGenG; + pgDst->pbQ = pgSrc->pbQ; + + memcpy( (PBYTE)pgDst + sizeof(SYMCRYPT_DLGROUP), (PCBYTE)pgSrc + sizeof(SYMCRYPT_DLGROUP), pgSrc->cbTotalSize - sizeof(SYMCRYPT_DLGROUP) ); + } +} + + +// DLGROUP-specific functions + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDlgroupGeneratePrimeQ_FIPS( + _In_ PSYMCRYPT_DLGROUP pDlgroup, + _In_ PCSYMCRYPT_TRIALDIVISION_CONTEXT + pTrialDivisionContext, + _Out_ PUINT32 pfPrimeQFound, + _Out_ PSYMCRYPT_INT piQ, + _Out_ PSYMCRYPT_DIVISOR pdDivTwoQ, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PCSYMCRYPT_HASH hashAlgorithm = pDlgroup->pHashAlgorithm; + UINT32 nBitsOfQ = pDlgroup->nBitsOfQ; + UINT32 cbPrimeQ = pDlgroup->cbPrimeQ; + PBYTE pbSeed = pDlgroup->pbSeed; + UINT32 cbSeed = pDlgroup->cbSeed; + + PSYMCRYPT_INT piDivTwoQ = SymCryptIntFromDivisor(pdDivTwoQ); + + SIZE_T cbHash = SymCryptHashResultSize( hashAlgorithm ); + PBYTE pbTrHash = NULL; // Pointer to the truncated hash value + PBYTE pbHashExtra = NULL; // Needed as temp buffer for 186-2 + + UINT32 dwShiftBits = (8-nBitsOfQ%8)%8; // When nBitsOfQ is a multiple of 8 -> dwShiftBits = 0; + + UINT32 carry = 0; + + UNREFERENCED_PARAMETER( cbScratch ); + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_INT_TO_DIVISOR(SymCryptDigitsFromBits(nBitsOfQ+1)), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_INT_IS_PRIME(pDlgroup->nDigitsOfQ), + 2 * cbHash )) ); + SYMCRYPT_ASSERT( cbHash >= cbPrimeQ ); + + // Hash the seed according to the standard specified + if (pDlgroup->eFipsStandard == SYMCRYPT_DLGROUP_FIPS_186_2) + { + SYMCRYPT_ASSERT( hashAlgorithm == SymCryptSha1Algorithm ); + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_MAX(2*cbHash, cbSeed) ); + + // Hash buffers + pbTrHash = pbScratch; + pbHashExtra = pbTrHash + cbHash; + + // Prepare an int for SEED + 1 + scError = SymCryptIntSetValue( pbSeed, cbSeed, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, piDivTwoQ ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // Add 1 + carry = SymCryptIntAddUint32( piDivTwoQ, 1, piDivTwoQ ); + if (carry > 0) + { + // This should never happen as the size of piDivTwoQ is at least one bit bigger than Q + scError = SYMCRYPT_FIPS_FAILURE; + goto cleanup; + } + + // (SEED+1) Mod 2^nBitsOfSeed + SymCryptIntModPow2( piDivTwoQ, nBitsOfQ, piDivTwoQ ); + + // Get the value into pbTrHash (Notice the cbSeed size) + scError = SymCryptIntGetValue( piDivTwoQ, pbTrHash, cbSeed, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // Hash it into pbHashExtra + SymCryptHash( hashAlgorithm, pbTrHash, cbPrimeQ, pbHashExtra, cbHash ); + + // Hash the seed + SymCryptHash( hashAlgorithm, pbSeed, cbSeed, pbTrHash, cbHash ); + + // Xor the two + SymCryptXorBytes( pbTrHash, pbHashExtra, pbTrHash, cbHash ); + + } + else if (pDlgroup->eFipsStandard == SYMCRYPT_DLGROUP_FIPS_186_3) + { + SYMCRYPT_ASSERT( cbScratch >= cbHash ); + pbTrHash = pbScratch; + SymCryptHash( hashAlgorithm, pbSeed, cbSeed, pbTrHash, cbHash ); + } + else + { + scError = SYMCRYPT_FIPS_FAILURE; + goto cleanup; + } + + // Convert it to (2^{N-1} + (Hash mod 2^{N-1})) | 1 + pbTrHash += (cbHash-cbPrimeQ); // Skip any leading zero bytes + pbTrHash[0] &= ((BYTE)0xff >> (dwShiftBits)); // Cut off top bits in the most significant byte + pbTrHash[0] |= ((BYTE)0x01 << (7 - dwShiftBits)); // Set the (N-1)-th bit + pbTrHash[cbPrimeQ-1] |= ((BYTE)0x01); // Make the entire number odd + + // Set the value + scError = SymCryptIntSetValue( pbTrHash, cbPrimeQ, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, piQ ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // Assume not a prime + *pfPrimeQFound = 0; + + // Fast compositeness check + if (SymCryptIntFindSmallDivisor( pTrialDivisionContext, piQ, NULL, 0 )) + { + goto cleanup; + } + + // IntMillerRabinPrimalityTest requirement: + // piQ > 3 since nBitsOfQ is bounded by SYMCRYPT_DLGROUP_MIN_BITSIZE_Q + *pfPrimeQFound = SymCryptIntMillerRabinPrimalityTest( + piQ, + nBitsOfQ, + DLGROUP_MR_ITERATIONS, + SYMCRYPT_FLAG_DATA_PUBLIC, // q and p will be public + pbScratch, + cbScratch ); + + // Set pdDivTwoQ + if (*pfPrimeQFound) + { + scError = SymCryptIntCopyMixedSize( piQ, piDivTwoQ ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + SymCryptIntMulPow2( piDivTwoQ, 1, piDivTwoQ ); + + // IntToDivisor requirement: + // Q is non-zero as prime --> 2*Q != 0 + SymCryptIntToDivisor( + piDivTwoQ, + pdDivTwoQ, + 4*pDlgroup->nBitsOfP, // 4*L + SYMCRYPT_FLAG_DATA_PUBLIC, + pbScratch, + cbScratch ); + } + +cleanup: + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDlgroupGeneratePrimeP_FIPS( + _In_ PSYMCRYPT_DLGROUP pDlgroup, + _In_ PSYMCRYPT_DIVISOR pdDivTwoQ, + _In_ UINT32 dwMaxCounter, // Maximum value of counter (used in validation) + _In_ PCSYMCRYPT_TRIALDIVISION_CONTEXT + pTrialDivisionContext, + _Out_ PUINT32 pfPrimePFound, + _Out_ PSYMCRYPT_INT piP, + _Out_ PUINT32 pdwCounter, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PCSYMCRYPT_HASH hashAlgorithm = pDlgroup->pHashAlgorithm; + UINT32 nBitsOfP = pDlgroup->nBitsOfP; + PBYTE pbSeed = pDlgroup->pbSeed; + UINT32 cbSeed = pDlgroup->cbSeed; + UINT32 nBitsOfSeed = pDlgroup->nBitsOfSeed; + + SIZE_T cbHash = SymCryptHashResultSize( hashAlgorithm ); + + UINT32 counter = 0; + + UINT32 ndDivTwoQ = SymCryptDivisorDigitsizeOfObject( pdDivTwoQ ); + UINT32 cbIntTwoQ = SymCryptSizeofIntFromDigits( ndDivTwoQ ); + + PSYMCRYPT_INT piPersistent = NULL; + PSYMCRYPT_INT piRemainder = NULL; + + PBYTE pbHashOutput = NULL; + PBYTE pbTempSeed = NULL; + + PBYTE pbW = NULL; + UINT32 cbW = pDlgroup->cbPrimeP; + + PBYTE pbWCurr = NULL; + SIZE_T cbWBytesLeft = 0; + + UINT32 carry = 0; + + // We will use internal scratch space at the start of pbScratch + // because cbHash, cbSeed and cbW are not necessarily aligned according + // to SYMCRYPT_ASYM_ALIGN_VALUE + PBYTE pbScratchInternal = 0; + SIZE_T cbScratchInternal = 0; + + UNREFERENCED_PARAMETER( cbScratch ); + SYMCRYPT_ASSERT( cbScratch >= 2*cbIntTwoQ + cbHash + cbSeed + cbW + + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_INT_DIVMOD( pDlgroup->nDigitsOfP, ndDivTwoQ ), + SYMCRYPT_SCRATCH_BYTES_FOR_INT_IS_PRIME( pDlgroup->nDigitsOfP )) ); + + // Create temporaries + pbScratchInternal = pbScratch; + cbScratchInternal = SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_INT_DIVMOD( pDlgroup->nDigitsOfP, ndDivTwoQ ), + SYMCRYPT_SCRATCH_BYTES_FOR_INT_IS_PRIME( pDlgroup->nDigitsOfP ) ); + pbScratch += cbScratchInternal; + + piPersistent = SymCryptIntCreate( pbScratch, cbIntTwoQ, ndDivTwoQ ); + pbScratch += cbIntTwoQ; + + piRemainder = SymCryptIntCreate( pbScratch, cbIntTwoQ, ndDivTwoQ ); + pbScratch += cbIntTwoQ; + + pbHashOutput = pbScratch; + pbScratch += cbHash; + + pbTempSeed = pbScratch; + pbScratch += cbSeed; + + pbW = pbScratch; + + // Set the value for the expression "domain_parameter_seed + offset + j" + scError = SymCryptIntSetValue( pbSeed, cbSeed, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, piPersistent ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // If the standard is 186-2 add 1 since the starting offset is 2 + if (pDlgroup->eFipsStandard == SYMCRYPT_DLGROUP_FIPS_186_2) + { + carry = SymCryptIntAddUint32( piPersistent, 1, piPersistent ); + if (carry!=0) + { + // This should never happen as piPersistent has at least one more bit than + // seedLen == nBitsOfQ + scError = SYMCRYPT_FIPS_FAILURE; + goto cleanup; + } + + // Mod 2^seedlen + SymCryptIntModPow2( piPersistent, nBitsOfSeed, piPersistent ); + } + + *pfPrimePFound = 0; + + for (counter = 0; counter < dwMaxCounter+1; counter++) + { + cbWBytesLeft = cbW; // Bytes left to write + pbWCurr = pbW + cbW - SYMCRYPT_MIN(cbW,cbHash); // Position of the first hash chunk to write (if cbW < cbHash then we write only 1 chunk) + + while (cbWBytesLeft > 0) + { + // Add 1 to piPersistent + // This can never generate a carry as piPersistent has at least one more bit than + // seedLen == nBitsOfQ and in the next step we always do mod 2^seedlen. + carry = SymCryptIntAddUint32( piPersistent, 1, piPersistent ); + if (carry!=0) + { + scError = SYMCRYPT_FIPS_FAILURE; + goto cleanup; + } + + // Mod 2^seedlen + SymCryptIntModPow2( piPersistent, nBitsOfSeed, piPersistent ); + + // Extract piPersistent into a byte array (this will always be equal to domain_parameter_seed + offset + j) + scError = SymCryptIntGetValue( piPersistent, pbTempSeed, cbSeed, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // Hash it + SymCryptHash( hashAlgorithm, pbTempSeed, cbSeed, pbHashOutput, cbHash ); + + if (cbWBytesLeft >= cbHash) + { + // Move the entire hash output to the correct location in the pbW buffer + memcpy(pbWCurr, pbHashOutput, cbHash ); + } + else + { + // Move only the last bytes of the hash output + memcpy(pbWCurr, pbHashOutput + cbHash - cbWBytesLeft, cbWBytesLeft ); + } + + // Update the positions on the W buffer + cbWBytesLeft -= SYMCRYPT_MIN(cbHash,cbWBytesLeft); + pbWCurr -= SYMCRYPT_MIN(cbHash,cbWBytesLeft); + } + + // Import the W buffer into P + scError = SymCryptIntSetValue( pbW, cbW, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, piP ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // Zero-out the top bits of the integer + SymCryptIntModPow2( piP, nBitsOfP, piP ); + + // Set the most significant bit + SymCryptIntSetBits( piP, 1, nBitsOfP-1, 1); + + // At this point piP = X = W + 2^{L-1} + + // Calculate c = X mod 2Q + SymCryptIntDivMod( piP, pdDivTwoQ, NULL, piRemainder, pbScratchInternal, cbScratchInternal ); + + if (SymCryptIntIsEqualUint32(piRemainder, 0)) + { + // Just add one to X + // We can never get a carry here because the remainder X mod 2Q + // is 0. Therefore X is even. + carry = SymCryptIntAddUint32( piP, 1, piP ); + SYMCRYPT_ASSERT( carry==0 ); + } + else + { + // Subtract 1 from c + // We can never get a borrow here because the remainder is not 0. + carry = SymCryptIntSubUint32( piRemainder, 1, piRemainder ); + SYMCRYPT_ASSERT( carry==0 ); + + // X-(c-1) + // We can never get a borrow here because c is smaller + // or equal to X. + carry = SymCryptIntSubMixedSize( piP, piRemainder, piP ); + SYMCRYPT_ASSERT( carry==0 ); + } + + // Check if smaller than 2^{L-1} by checking the L-1 bit + if (SymCryptIntGetBit( piP, nBitsOfP-1 ) == 0) + { + continue; + } + + // Fast compositeness check + if (SymCryptIntFindSmallDivisor( pTrialDivisionContext, piP, NULL, 0 )) + { + continue; + } + + // IntMillerRabinPrimalityTest requirement: + // piP > 3 since nBitsOfP is bounded by SYMCRYPT_DLGROUP_MIN_BITSIZE_P + *pfPrimePFound = SymCryptIntMillerRabinPrimalityTest( + piP, + nBitsOfP, + DLGROUP_MR_ITERATIONS, + SYMCRYPT_FLAG_DATA_PUBLIC, // q and p will be public + pbScratchInternal, + cbScratchInternal ); + + if (*pfPrimePFound) + { + *pdwCounter = counter; + break; + } + } +cleanup: + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDlgroupGenerateGenG_FIPS( + _In_ PSYMCRYPT_DLGROUP pDlgroup, + _Out_ PSYMCRYPT_MODELEMENT peG, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PCSYMCRYPT_HASH hashAlgorithm = pDlgroup->pHashAlgorithm; + PCSYMCRYPT_MODULUS pmP = pDlgroup->pmP; + UINT32 nDigitsOfP = pDlgroup->nDigitsOfP; + UINT32 nBitsOfP = pDlgroup->nBitsOfP; + PCSYMCRYPT_MODULUS pmQ = pDlgroup->pmQ; + UINT32 nDigitsOfQ = pDlgroup->nDigitsOfQ; + PBYTE pbSeed = pDlgroup->pbSeed; + UINT32 cbSeed = pDlgroup->cbSeed; + BYTE bIndexGenG = pDlgroup->bIndexGenG; + + SIZE_T cbHash = SymCryptHashResultSize( hashAlgorithm ); + SYMCRYPT_ASSERT( cbHash == hashAlgorithm->resultSize ); + SIZE_T cbState = SymCryptHashStateSize( hashAlgorithm ); + SYMCRYPT_ASSERT( cbState == hashAlgorithm->stateSize ); + + UINT16 count = 0; + BYTE bTmp = 0; + + PSYMCRYPT_INT piExp = NULL; + PSYMCRYPT_INT piRem = NULL; + PSYMCRYPT_MODELEMENT peOne = NULL; + PBYTE pbState = NULL; + PBYTE pbW = NULL; + + UINT32 cbExp = SymCryptSizeofIntFromDigits( nDigitsOfP ); + UINT32 cbRem = SymCryptSizeofIntFromDigits( nDigitsOfQ ); + UINT32 cbModElement = SymCryptSizeofModElementFromModulus( pmP ); + + UINT32 borrow = 0; + + // We will use internal scratch space at the start of pbScratch + // because cbHash is not necessarily aligned according + // to SYMCRYPT_ASYM_ALIGN_VALUE + PBYTE pbScratchInternal = 0; + SIZE_T cbScratchInternal = 0; + + UNREFERENCED_PARAMETER( cbScratch ); + UNREFERENCED_PARAMETER( nDigitsOfQ ); + + // Create temporaries + pbScratchInternal = pbScratch; + cbScratchInternal = SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_MODEXP( nDigitsOfP ), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_INT_DIVMOD( nDigitsOfP, nDigitsOfQ ), + SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigitsOfP ) )); + SYMCRYPT_ASSERT( cbScratch >= cbScratchInternal + cbExp + cbRem ); + SYMCRYPT_ASSERT( cbScratch >= cbScratchInternal + cbExp + cbModElement + cbHash + cbState ); + pbScratch += cbScratchInternal; + + piExp = SymCryptIntCreate( pbScratch, cbExp, nDigitsOfP ); + pbScratch += cbExp; + + piRem = SymCryptIntCreate( pbScratch, cbRem, nDigitsOfQ ); + + // Calculate the exponent e = (p-1)/q + borrow = SymCryptIntSubUint32( SymCryptIntFromModulus((PSYMCRYPT_MODULUS)pmP), 1, piExp ); + if (borrow!=0) + { + // The only way to get a borrow here is if the imported prime P + // is zero and we generate a G from P and Q. + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + SymCryptIntDivMod( + piExp, + SymCryptDivisorFromModulus( (PSYMCRYPT_MODULUS)pmQ ), + piExp, + piRem, + pbScratchInternal, + cbScratchInternal ); + + if ( !SymCryptIntIsEqualUint32(piRem, 0) ) + { + // The only way to get a non-zero remainder is if Q does not divide P-1 + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // To reach here we have guaranteed that P and Q are odd, with bitlength >= 32b, and Q divides P-1. + // It follows that piExp >= 2, as it must be even and non-zero. + + peOne = SymCryptModElementCreate( pbScratch, cbModElement, pmP); + pbScratch += cbModElement; + + pbState = pbScratch; + pbScratch += cbState; + + pbW = pbScratch; + + // Initialize the hash state + SymCryptHashInit( hashAlgorithm, pbState ); + + // Set the modelement equal to one + SymCryptModElementSetValueUint32( 1, pmP, peOne, pbScratchInternal, cbScratchInternal ); + + do + { + count += 1; + + if (count == 0) + { + scError = SYMCRYPT_FIPS_FAILURE; + goto cleanup; + } + + // Hash the seed + SymCryptHashAppend( hashAlgorithm, pbState, pbSeed, cbSeed ); + + // Hash the "ggen" string + SymCryptHashAppend( hashAlgorithm, pbState, ggen, sizeof(ggen) ); + + // Hash the index + SymCryptHashAppend( hashAlgorithm, pbState, &bIndexGenG, sizeof(bIndexGenG) ); + + // Hash the count (in MSB) + bTmp = (BYTE)(count >> 8); + SymCryptHashAppend( hashAlgorithm, pbState, &bTmp, sizeof(bTmp) ); + bTmp = (BYTE)count; + SymCryptHashAppend( hashAlgorithm, pbState, &bTmp, sizeof(bTmp) ); + + // Result into W + SymCryptHashResult( hashAlgorithm, pbState, pbW, cbHash ); + + // Set this into G + scError = SymCryptModElementSetValue( + pbW, + cbHash, + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, + pmP, + peG, + pbScratchInternal, + cbScratchInternal ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // ModExp G in place + SymCryptModExp( + pmP, + peG, + piExp, + nBitsOfP, + SYMCRYPT_FLAG_DATA_PUBLIC, + peG, + pbScratchInternal, + cbScratchInternal ); + + } while (SymCryptModElementIsZero( pmP, peG ) || SymCryptModElementIsEqual( pmP, peG, peOne )); + +cleanup: + return scError; +} + +// Scratch space requirements for the entire FIPS standards generation of P,Q,G +UINT32 +SYMCRYPT_CALL +SymCryptDlgroupScratchSpace_FIPS( UINT32 nBitsOfP, UINT32 nBitsOfQ, PCSYMCRYPT_HASH pHashAlgorithm ) +{ + UINT32 nDigitsOfP = SymCryptDigitsFromBits( nBitsOfP ); + UINT32 nDigitsOfQ = SymCryptDigitsFromBits( nBitsOfQ ); + UINT32 ndDivTwoQ = SymCryptDigitsFromBits(nBitsOfQ + 1); + + UINT32 cbPrimeP = (nBitsOfP+7)/8; // Note: The upper bound for nBitsOfP is enforced by SymCryptDigitsFromBits + UINT32 cbDivTwoQ = SymCryptSizeofDivisorFromDigits(ndDivTwoQ); + UINT32 cbIntTwoQ = SymCryptSizeofIntFromDigits( ndDivTwoQ ); + UINT32 cbSeed = (nBitsOfQ+7)/8; // Note: The upper bound for nBitsOfP is enforced by SymCryptDigitsFromBits + + UINT32 cbExp = SymCryptSizeofIntFromDigits( nDigitsOfP ); + UINT32 cbRem = SymCryptSizeofIntFromDigits( nDigitsOfQ ); + UINT32 cbModElement = SYMCRYPT_SIZEOF_MODELEMENT_FROM_BITS( nBitsOfP ); + + UINT32 cbHash = (UINT32)SymCryptHashResultSize( pHashAlgorithm ); + UINT32 cbState = (UINT32) SymCryptHashStateSize( pHashAlgorithm ); + + // + // From symcrypt_internal.h we have: + // - sizeof results are upper bounded by 2^19 + // - SYMCRYPT_SCRATCH_BYTES results are upper bounded by 2^27 (including RSA and ECURVE) + // Thus the following calculation does not overflow the result and is bounded by 2^28. + // + return SYMCRYPT_MAX( cbDivTwoQ + SYMCRYPT_MAX( + // Generate Q + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_INT_TO_DIVISOR( ndDivTwoQ ), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_INT_IS_PRIME( nDigitsOfQ ), + 2 * cbHash)), + // Generate P + 2*cbIntTwoQ + cbHash + cbSeed + cbPrimeP + + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_INT_DIVMOD( nDigitsOfP, ndDivTwoQ ), + SYMCRYPT_SCRATCH_BYTES_FOR_INT_IS_PRIME( nDigitsOfP )) ), + SYMCRYPT_MAX( + // Convert P and Q to moduli + SYMCRYPT_SCRATCH_BYTES_FOR_INT_TO_MODULUS( nDigitsOfP ), + // Generate GenG + cbExp + SYMCRYPT_MAX(cbRem, cbModElement + cbState + cbHash) + + SYMCRYPT_MAX(SYMCRYPT_SCRATCH_BYTES_FOR_MODEXP( nDigitsOfP ), + SYMCRYPT_MAX(SYMCRYPT_SCRATCH_BYTES_FOR_INT_DIVMOD( nDigitsOfP, nDigitsOfQ ), + SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigitsOfP ) )) )); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDlgroupGenerate( + _In_ PCSYMCRYPT_HASH hashAlgorithm, + _In_ SYMCRYPT_DLGROUP_FIPS fipsStandard, + _Inout_ PSYMCRYPT_DLGROUP pDlgroup ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PBYTE pbScratch = NULL; + SIZE_T cbScratch = 0; + PBYTE pbScratchInternal = NULL; + SIZE_T cbScratchInternal = 0; + + UINT32 fPrimeQFound = 0; + UINT32 fPrimePFound = 0; + + // A divisor equal to 2*Q will be needed for the generation of P + PSYMCRYPT_DIVISOR pdDivTwoQ = NULL; + UINT32 cbDivTwoQ = 0; + UINT32 ndDivTwoQ = 0; + + UINT32 nBitsOfP = 0; + UINT32 nDigitsOfP = 0; + UINT32 nBitsOfQ = 0; + UINT32 nDigitsOfQ = 0; + + PCSYMCRYPT_TRIALDIVISION_CONTEXT pTrialDivisionContext = NULL; + + if (fipsStandard == SYMCRYPT_DLGROUP_FIPS_NONE) + { + fipsStandard = SYMCRYPT_DLGROUP_FIPS_LATEST; + } + + // Numbered comments refer to the steps in the FIPS standard + // 1. Check that L,N is in the list of acceptable pairs + // => Skipped as SymCrypt supports more sizes + + // 2. Check that seedlen >= N + // => Skipped as we always have seedlen == N (see below) + + + // Make sure that a hash algorithm is passed (if needed) + // and set the FIPS standard + if (fipsStandard == SYMCRYPT_DLGROUP_FIPS_186_2) + { + if (hashAlgorithm != NULL) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + pDlgroup->eFipsStandard = fipsStandard; + hashAlgorithm = SymCryptSha1Algorithm; + } + else + { + if (hashAlgorithm == NULL) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + pDlgroup->eFipsStandard = fipsStandard; + } + + // If during allocation the caller didn't know the size of Q + // and set it to 0, pick the default bitsize here + // and fix all the zero parameters. + if (pDlgroup->nBitsOfQ == 0) + { + pDlgroup->nBitsOfQ = SymCryptDlgroupCalculateBitsizeOfQ(pDlgroup->nBitsOfP); + + if (pDlgroup->nBitsOfQ > pDlgroup->nMaxBitsOfQ) + { + scError = SYMCRYPT_FIPS_FAILURE; // This hits when nMaxBitsOfQ = (nBitsOfP-1) <= 160 + goto cleanup; + } + + pDlgroup->cbPrimeQ = (pDlgroup->nBitsOfQ + 7)/8; + pDlgroup->nDigitsOfQ = SymCryptDigitsFromBits( pDlgroup->nBitsOfQ ); + pDlgroup->nDefaultBitsPriv = pDlgroup->nBitsOfQ; + pDlgroup->nBitsOfSeed = pDlgroup->nBitsOfQ; + pDlgroup->cbSeed = (pDlgroup->nBitsOfSeed+7)/8; + } + + // Helper variables + nBitsOfP = pDlgroup->nBitsOfP; + nDigitsOfP = pDlgroup->nDigitsOfP; + nBitsOfQ = pDlgroup->nBitsOfQ; + nDigitsOfQ = pDlgroup->nDigitsOfQ; + + // Create the modulus Q + pDlgroup->pmQ = SymCryptModulusCreate( pDlgroup->pbQ, SymCryptSizeofModulusFromDigits( nDigitsOfQ ), nDigitsOfQ ); + + // Conditions on the hash function output size + // The second condition is needed for generation of G in SymCrypt + // since it allows even very small sizes of P. + if ( (8*((UINT32)SymCryptHashResultSize( hashAlgorithm )) < nBitsOfQ) || + (8*((UINT32)SymCryptHashResultSize( hashAlgorithm )) > nBitsOfP) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Set the group's hash algorithm + pDlgroup->pHashAlgorithm = hashAlgorithm; + + // Calculate sizes for the 2*Q divisor + ndDivTwoQ = SymCryptDigitsFromBits(nBitsOfQ + 1); + cbDivTwoQ = SymCryptSizeofDivisorFromDigits(ndDivTwoQ); + + // Scratch space + // + // From symcrypt_internal.h we have: + // - sizeof results are upper bounded by 2^19 + // - SYMCRYPT_SCRATCH_BYTES results are upper bounded by 2^27 (including RSA and ECURVE) + // - SymCryptDlgroupScratchSpace_FIPS is bounded by 2^28. + // + // Thus the following calculation does not overflow cbScratch. + // + cbScratch = SymCryptDlgroupScratchSpace_FIPS( nBitsOfP, nBitsOfQ, hashAlgorithm ); + pbScratch = SymCryptCallbackAlloc(cbScratch); + if (pbScratch==NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + // Create a divisor 2*Q (needed for the generation of P) + pdDivTwoQ = SymCryptDivisorCreate( pbScratch, cbDivTwoQ, ndDivTwoQ ); + pbScratchInternal = pbScratch + cbDivTwoQ; + cbScratchInternal = cbScratch - cbDivTwoQ; + + // Create a trial division context for both P and Q + pTrialDivisionContext = SymCryptCreateTrialDivisionContext( pDlgroup->nDigitsOfP ); + if (pTrialDivisionContext == NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + do + { + do + { + // Fill the seed buffer in the DLGroup with seedlen bits + scError = SymCryptCallbackRandom( pDlgroup->pbSeed, pDlgroup->cbSeed ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // Zero-out the top bits if needed + if ((pDlgroup->nBitsOfSeed)%8 != 0) + { + pDlgroup->pbSeed[0] &= ((BYTE)0xff >> (8 - (pDlgroup->nBitsOfSeed)%8)); + } + + scError = SymCryptDlgroupGeneratePrimeQ_FIPS( + pDlgroup, + pTrialDivisionContext, + &fPrimeQFound, + SymCryptIntFromModulus(pDlgroup->pmQ), + pdDivTwoQ, + pbScratchInternal, + cbScratchInternal ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + } + while (fPrimeQFound == 0); + + scError = SymCryptDlgroupGeneratePrimeP_FIPS( + pDlgroup, + pdDivTwoQ, + 4*nBitsOfP - 1, + pTrialDivisionContext, + &fPrimePFound, + SymCryptIntFromModulus(pDlgroup->pmP), + &(pDlgroup->dwGenCounter), + pbScratchInternal, + cbScratchInternal ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + } + while (fPrimePFound == 0); + + // Specify that we have a Q + pDlgroup->fHasPrimeQ = TRUE; + + // Convert both of P and Q to moduli + // IntToModulus requirement: + // Both P,Q > 0 since they are primes + SymCryptIntToModulus( + SymCryptIntFromModulus( pDlgroup->pmP ), + pDlgroup->pmP, + 1000*nBitsOfP, // Average operations + SYMCRYPT_FLAG_DATA_PUBLIC | SYMCRYPT_FLAG_MODULUS_PRIME, + pbScratch, + cbScratch ); + + SymCryptIntToModulus( + SymCryptIntFromModulus( pDlgroup->pmQ ), + pDlgroup->pmQ, + 1000*nBitsOfP, // Average operations + SYMCRYPT_FLAG_DATA_PUBLIC | SYMCRYPT_FLAG_MODULUS_PRIME, + pbScratch, + cbScratch ); + + // Generate G + scError = SymCryptDlgroupGenerateGenG_FIPS( pDlgroup, pDlgroup->peG, pbScratch, cbScratch ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + +cleanup: + if (pTrialDivisionContext!=NULL) + { + SymCryptFreeTrialDivisionContext( pTrialDivisionContext ); + } + + if (pbScratch!=NULL) + { + SymCryptWipe( pbScratch, cbScratch ); + SymCryptCallbackFree( pbScratch ); + } + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDlgroupSetValueSafePrime( + SYMCRYPT_DLGROUP_DH_SAFEPRIMETYPE dhSafePrimeType, + _Inout_ PSYMCRYPT_DLGROUP pDlgroup ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PBYTE pbScratch = NULL; + SIZE_T cbScratch = 0; + + PCSYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS safePrimeParams = NULL; + + UINT32 i; + UINT32 nBitsOfQ; + + // Given we know nBitsOfP = nBitsOfQ+1 for all safe-prime groups, this specifies a tight bound when selecting a group + UINT32 nMaxBitsOfP = SYMCRYPT_MIN(pDlgroup->nMaxBitsOfP, pDlgroup->nMaxBitsOfQ+1); + UINT32 nMaxDigitsOfP; + + if ( dhSafePrimeType == SYMCRYPT_DLGROUP_DH_SAFEPRIMETYPE_NONE ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Iterate through all named safe-prime groups until we find one which fits the requested parameters + // We can definitely do something smarter here, but we have only 10 values to check so do the dumb thing for now + // Relies on the fact the SymCryptNamedSafePrimeGroups is ordered from largest to smallest + for ( i=0; i<SYMCRYPT_DH_SAFEPRIME_GROUP_COUNT; i++ ) + { + if ( SymCryptNamedSafePrimeGroups[i]->eDhSafePrimeType == dhSafePrimeType && + SymCryptNamedSafePrimeGroups[i]->nBitsOfP <= nMaxBitsOfP ) + { + safePrimeParams = SymCryptNamedSafePrimeGroups[i]; + break; + } + } + + if (safePrimeParams == NULL) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + nMaxDigitsOfP = SymCryptDigitsFromBits(safePrimeParams->nBitsOfP); + + // Scratch space + // + // From symcrypt_internal.h we have: + // - SYMCRYPT_SCRATCH_BYTES results are upper bounded by 2^27 (including RSA and ECURVE) + // + // Thus the following calculation does not overflow cbScratch. + // + cbScratch = SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_INT_TO_MODULUS(nMaxDigitsOfP), + SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS(nMaxDigitsOfP) ); + pbScratch = SymCryptCallbackAlloc( cbScratch ); + if (pbScratch==NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + // Set fields marking the Dlgroup as being a named safe-prime group + pDlgroup->isSafePrimeGroup = TRUE; + pDlgroup->eFipsStandard = SYMCRYPT_DLGROUP_FIPS_NONE; + pDlgroup->nMinBitsPriv = safePrimeParams->nMinBitsPriv; + pDlgroup->nDefaultBitsPriv = safePrimeParams->nDefaultBitsPriv; + + // Ensure that fields which don't apply to named safe-prime groups are cleared + pDlgroup->pHashAlgorithm = NULL; + pDlgroup->dwGenCounter = 0; + + pDlgroup->nBitsOfSeed = 0; + pDlgroup->pbSeed = NULL; + pDlgroup->cbSeed = 0; + + // Set the bitsize and bytesize of P + pDlgroup->nBitsOfP = safePrimeParams->nBitsOfP; + pDlgroup->cbPrimeP = (safePrimeParams->nBitsOfP + 7)/ 8; + pDlgroup->nDigitsOfP = SymCryptDigitsFromBits(safePrimeParams->nBitsOfP); + + // Set the bitsize and bytesize of Q + nBitsOfQ = pDlgroup->nBitsOfP - 1; + pDlgroup->nBitsOfQ = nBitsOfQ; + pDlgroup->cbPrimeQ = (nBitsOfQ + 7)/8; + pDlgroup->nDigitsOfQ = SymCryptDigitsFromBits(nBitsOfQ); + pDlgroup->fHasPrimeQ = TRUE; + + // + // Prime P + // + + // Recreate the modulus P + // (this will set nDigits in the modulus object appropriately, which is necessary for use of SymCryptIntShr1 below) + pDlgroup->pmP = SymCryptModulusCreate( (PBYTE) pDlgroup->pmP, SymCryptSizeofModulusFromDigits( pDlgroup->nDigitsOfP ), pDlgroup->nDigitsOfP ); + + scError = SymCryptIntSetValue( safePrimeParams->pcbPrimeP, pDlgroup->cbPrimeP, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, SymCryptIntFromModulus(pDlgroup->pmP) ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // IntToModulus requirement: + // nBitsOfP >= SYMCRYPT_DLGROUP_MIN_BITSIZE_P --> P > 0 + SymCryptIntToModulus( + SymCryptIntFromModulus( pDlgroup->pmP ), + pDlgroup->pmP, + 1000*pDlgroup->nBitsOfP, // Average operations + SYMCRYPT_FLAG_DATA_PUBLIC | SYMCRYPT_FLAG_MODULUS_PRIME, + pbScratch, + cbScratch ); + + // + // Prime Q + // + + // Create the modulus Q + pDlgroup->pmQ = SymCryptModulusCreate( pDlgroup->pbQ, SymCryptSizeofModulusFromDigits( pDlgroup->nDigitsOfQ ), pDlgroup->nDigitsOfQ ); + + // Q = floor( P / 2 ) + SymCryptIntShr1( 0, SymCryptIntFromModulus(pDlgroup->pmP), SymCryptIntFromModulus(pDlgroup->pmQ) ); + + // IntToModulus requirement: + // nBitsOfQ >= SYMCRYPT_DLGROUP_MIN_BITSIZE_Q --> Q > 0 + SymCryptIntToModulus( + SymCryptIntFromModulus( pDlgroup->pmQ ), + pDlgroup->pmQ, + 1000*nBitsOfQ, // Average operations + SYMCRYPT_FLAG_DATA_PUBLIC | SYMCRYPT_FLAG_MODULUS_PRIME, + pbScratch, + cbScratch ); + + // + // Generator G + // + + // G to 2 + SymCryptModElementSetValueUint32( 2, pDlgroup->pmP, pDlgroup->peG, pbScratch, cbScratch ); + +cleanup: + if (pbScratch!=NULL) + { + SymCryptWipe( pbScratch, cbScratch ); + SymCryptCallbackFree( pbScratch ); + } + return scError; +} + +BOOLEAN +SYMCRYPT_CALL +SymCryptDlgroupIsSame( + _In_ PCSYMCRYPT_DLGROUP pDlgroup1, + _In_ PCSYMCRYPT_DLGROUP pDlgroup2 ) +{ + BOOLEAN fIsSameGroup = FALSE; + + if ( pDlgroup1 == pDlgroup2 ) + { + fIsSameGroup = TRUE; + goto cleanup; + } + + if ( (pDlgroup1->nBitsOfP != pDlgroup2->nBitsOfP) || + (pDlgroup1->nDigitsOfP != pDlgroup2->nDigitsOfP) || + !SymCryptIntIsEqual ( SymCryptIntFromModulus(pDlgroup1->pmP), SymCryptIntFromModulus(pDlgroup2->pmP) ) || + !SymCryptModElementIsEqual ( pDlgroup1->pmP, pDlgroup1->peG, pDlgroup2->peG )) + { + goto cleanup; + } + + fIsSameGroup = TRUE; + +cleanup: + return fIsSameGroup; +} + +VOID +SYMCRYPT_CALL +SymCryptDlgroupGetSizes( + _In_ PCSYMCRYPT_DLGROUP pDlgroup, + _Out_ SIZE_T* pcbPrimeP, + _Out_ SIZE_T* pcbPrimeQ, + _Out_ SIZE_T* pcbGenG, + _Out_ SIZE_T* pcbSeed ) +{ + if (pcbPrimeP!=NULL) + { + *pcbPrimeP = pDlgroup->cbPrimeP; + } + + if (pcbPrimeQ!=NULL) + { + *pcbPrimeQ = pDlgroup->cbPrimeQ; // This returns 0 if the group does not have a prime Q + } + + if (pcbGenG!=NULL) + { + *pcbGenG = pDlgroup->cbPrimeP; + } + + if (pcbSeed!=NULL) + { + *pcbSeed = pDlgroup->cbSeed; // This returns 0 if the group does not have a prime Q + } +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDlgroupAutoCompleteNamedSafePrimeGroup( + _Inout_ PSYMCRYPT_DLGROUP pDlgroup, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PBYTE pbScratchInternal; + SIZE_T cbScratchInternal; + + PSYMCRYPT_INT piTemp = NULL; + UINT32 cbTemp; + UINT32 i; + UINT32 nBitsOfQ; + PCSYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS safePrimeParams = NULL; + + // Check whether bottom 64b of P all 1 - as first cheap check + if ( SymCryptIntGetValueLsbits64( SymCryptIntFromModulus(pDlgroup->pmP) ) != ((UINT64) -1) ) + { + goto cleanup; // Not a named safe-prime group + } + + cbTemp = SymCryptSizeofIntFromDigits( pDlgroup->nDigitsOfP ); + SYMCRYPT_ASSERT( cbScratch >= cbTemp ); + + // Create an integer piTemp + piTemp = SymCryptIntCreate( pbScratch, cbTemp, pDlgroup->nDigitsOfP ); + pbScratchInternal = pbScratch + cbTemp; + cbScratchInternal = cbScratch - cbTemp; + + // Set piTemp to the generator G (this will fail if the number cannot fit in the object) + SymCryptModElementToInt( pDlgroup->pmP, pDlgroup->peG, piTemp, pbScratchInternal, cbScratchInternal ); + + // Generator must be 2 mod P + if ( !SymCryptIntIsEqualUint32( piTemp, 2 ) ) + { + goto cleanup; // Not a named safe-prime group + } + + // Iterate through all named safe-prime groups and check whether any of them have matching Prime P + // We can definitely do something smarter here, but we have only 10 values to check so do the dumb thing for now + for ( i=0; i<SYMCRYPT_DH_SAFEPRIME_GROUP_COUNT; i++ ) + { + if ( SymCryptNamedSafePrimeGroups[i]->nBitsOfP == pDlgroup->nBitsOfP ) + { + // Set piTemp to the named safe-prime group's P (this will fail if the number cannot fit in the object) + SymCryptIntSetValue( SymCryptNamedSafePrimeGroups[i]->pcbPrimeP, pDlgroup->cbPrimeP, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, piTemp ); + + if ( SymCryptIntIsEqual( piTemp, SymCryptIntFromModulus(pDlgroup->pmP) ) ) + { + safePrimeParams = SymCryptNamedSafePrimeGroups[i]; + break; + } + } + } + + // If we found a match in the previous loop, auto-populate appropriate fields in pDlGroup + if (safePrimeParams != NULL) + { + if ( pDlgroup->eFipsStandard == SYMCRYPT_DLGROUP_FIPS_186_2 || + pDlgroup->eFipsStandard == SYMCRYPT_DLGROUP_FIPS_186_3 ) + { + // Inappropriate use of named safe-prime groups + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Set fields marking the Dlgroup as being a named safe-prime group + pDlgroup->isSafePrimeGroup = TRUE; + pDlgroup->eFipsStandard = SYMCRYPT_DLGROUP_FIPS_NONE; + pDlgroup->nMinBitsPriv = safePrimeParams->nMinBitsPriv; + pDlgroup->nDefaultBitsPriv = safePrimeParams->nDefaultBitsPriv; + + // Ensure that fields which don't apply to named safe-prime groups are cleared + pDlgroup->pHashAlgorithm = NULL; + pDlgroup->dwGenCounter = 0; + + pDlgroup->nBitsOfSeed = 0; + pDlgroup->pbSeed = NULL; + pDlgroup->cbSeed = 0; + + // Set the bitsize and bytesize of Q + nBitsOfQ = pDlgroup->nBitsOfP - 1; + pDlgroup->nBitsOfQ = nBitsOfQ; + pDlgroup->cbPrimeQ = (nBitsOfQ + 7)/8; + pDlgroup->nDigitsOfQ = SymCryptDigitsFromBits(nBitsOfQ); + + // Create the modulus Q + pDlgroup->pmQ = SymCryptModulusCreate( pDlgroup->pbQ, SymCryptSizeofModulusFromDigits( pDlgroup->nDigitsOfQ ), pDlgroup->nDigitsOfQ ); + + // piTemp still has the value of P, and Q = floor( P / 2 ) + SymCryptIntShr1( 0, piTemp, piTemp ); + + // Set the prime Q + scError = SymCryptIntCopyMixedSize( piTemp, SymCryptIntFromModulus(pDlgroup->pmQ) ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // IntToModulus requirement: + // nBitsOfQ >= SYMCRYPT_DLGROUP_MIN_BITSIZE_Q --> Q > 0 + SymCryptIntToModulus( + SymCryptIntFromModulus( pDlgroup->pmQ ), + pDlgroup->pmQ, + 1000*nBitsOfQ, // Average operations + SYMCRYPT_FLAG_DATA_PUBLIC | SYMCRYPT_FLAG_MODULUS_PRIME, + pbScratch, + cbScratch ); + + pDlgroup->fHasPrimeQ = TRUE; + } + +cleanup: + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDlgroupSetValue( + _In_reads_bytes_( cbPrimeP ) PCBYTE pbPrimeP, + SIZE_T cbPrimeP, + _In_reads_bytes_( cbPrimeQ ) PCBYTE pbPrimeQ, + SIZE_T cbPrimeQ, + _In_reads_bytes_( cbGenG ) PCBYTE pbGenG, + SIZE_T cbGenG, + SYMCRYPT_NUMBER_FORMAT numFormat, + _In_opt_ PCSYMCRYPT_HASH pHashAlgorithm, + _In_reads_bytes_( cbSeed ) PCBYTE pbSeed, + SIZE_T cbSeed, + UINT32 genCounter, + SYMCRYPT_DLGROUP_FIPS fipsStandard, + _Inout_ PSYMCRYPT_DLGROUP pDlgroup ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PBYTE pbScratch = NULL; + SIZE_T cbScratch = 0; + SIZE_T cbScratchVerify = 0; + + PSYMCRYPT_INT piTemp = NULL; + + UINT32 nBitsOfP = 0; + UINT32 nBitsOfQ = 0; + + UINT32 nMaxDigitsOfP = SymCryptDigitsFromBits(pDlgroup->nMaxBitsOfP); + UINT32 nMaxDigitsOfQ = SymCryptDigitsFromBits(pDlgroup->nMaxBitsOfQ); + + PCSYMCRYPT_TRIALDIVISION_CONTEXT pTrialDivisionContext = NULL; + + // Make sure that the inputs make sense + if ( (pbPrimeP==NULL) || (cbPrimeP==0) || // Prime P is needed + ((pbGenG==NULL)&&(cbGenG>0)) || + ((pbPrimeQ==NULL)&&(cbPrimeQ>0)) || + ((pbGenG==NULL)&&(pbPrimeQ==NULL)) || // We can't have both Q and G missing + ((pbSeed==NULL)&&(cbSeed>0)) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // FIPS 186-4 verification is needed + if (fipsStandard != SYMCRYPT_DLGROUP_FIPS_NONE) + { + // Make sure we have what we need + if ((pbPrimeQ == NULL)|| + (cbPrimeQ == 0) || + (pbSeed == NULL) || + (cbSeed == 0) || + ((fipsStandard == SYMCRYPT_DLGROUP_FIPS_186_2) && (pHashAlgorithm != NULL)) || + ((fipsStandard != SYMCRYPT_DLGROUP_FIPS_186_2) && (pHashAlgorithm == NULL)) ) + { + scError = SYMCRYPT_AUTHENTICATION_FAILURE; + goto cleanup; + } + } + + // Set the hashAlgorithm + if ( (fipsStandard == SYMCRYPT_DLGROUP_FIPS_186_2) || + ((pHashAlgorithm==NULL) && (pbGenG == NULL)) ) + { + // This hits either when: + // - The FIPS standard is 186-2 + // - When we don't specify an algorithm or generator G (thus we need a hash algorithm to generate it + // ourselves) + pDlgroup->pHashAlgorithm = SymCryptSha1Algorithm; + } + else + { + pDlgroup->pHashAlgorithm = pHashAlgorithm; + } + + if ( (fipsStandard != SYMCRYPT_DLGROUP_FIPS_NONE) || (pbGenG == NULL)) + { + // The following is the scratch space for generation / verification + // Notice that we take the maximum size possible so it can get relatively big. + // Also, we will need some additional space for the computed parameters: + // computedP, computedQ, and computedG. + cbScratchVerify = SymCryptDlgroupScratchSpace_FIPS( pDlgroup->nMaxBitsOfP, pDlgroup->nMaxBitsOfQ, pDlgroup->pHashAlgorithm ) + + SYMCRYPT_MAX( SymCryptSizeofIntFromDigits(nMaxDigitsOfP), + SYMCRYPT_MAX( SymCryptSizeofIntFromDigits(nMaxDigitsOfQ), + 2*SYMCRYPT_SIZEOF_MODELEMENT_FROM_BITS(nMaxDigitsOfP))); + } + + // Scratch space + // + // From symcrypt_internal.h we have: + // - sizeof results are upper bounded by 2^19 + // - SYMCRYPT_SCRATCH_BYTES results are upper bounded by 2^27 (including RSA and ECURVE) + // - SymCryptDlgroupScratchSpace_FIPS is bounded by 2^28. + // + // Thus the following calculation does not overflow cbScratch. + // + cbScratch = SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS(nMaxDigitsOfP) + + SYMCRYPT_MAX( SymCryptSizeofIntFromDigits(nMaxDigitsOfQ), + SYMCRYPT_SCRATCH_BYTES_FOR_INT_TO_MODULUS(nMaxDigitsOfP) ), + cbScratchVerify ); + pbScratch = SymCryptCallbackAlloc( cbScratch ); + if (pbScratch==NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + // + // Prime P + // + + // Set the prime P (this will fail if the number cannot fit in the object) + scError = SymCryptIntSetValue( pbPrimeP, cbPrimeP, numFormat, SymCryptIntFromModulus(pDlgroup->pmP) ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // Check the bitsize of value + nBitsOfP = SymCryptIntBitsizeOfValue(SymCryptIntFromModulus(pDlgroup->pmP)); + if ( nBitsOfP > pDlgroup->nMaxBitsOfP) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if (nBitsOfP < SYMCRYPT_DLGROUP_MIN_BITSIZE_P) + { + scError = SYMCRYPT_WRONG_KEY_SIZE; + goto cleanup; + } + + // FIPS 186-4 verification is needed + // Check genCounter is not too big + if (fipsStandard != SYMCRYPT_DLGROUP_FIPS_NONE && + genCounter > 4*nBitsOfP-1 ) + { + scError = SYMCRYPT_AUTHENTICATION_FAILURE; + goto cleanup; + } + + if( (SymCryptIntGetValueLsbits32( SymCryptIntFromModulus( pDlgroup->pmP ) ) & 1) == 0 ) + { + // P is even, when it should be a prime of at least 32 bits + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Set the bitsize and bytesize of the value + pDlgroup->nBitsOfP = nBitsOfP; + pDlgroup->cbPrimeP = (nBitsOfP + 7)/8; + + // IntToModulus requirement: + // nBitsOfP >= SYMCRYPT_DLGROUP_MIN_BITSIZE_P --> P > 0 + SymCryptIntToModulus( + SymCryptIntFromModulus( pDlgroup->pmP ), + pDlgroup->pmP, + 1000*nBitsOfP, // Average operations + SYMCRYPT_FLAG_DATA_PUBLIC | SYMCRYPT_FLAG_MODULUS_PRIME, + pbScratch, + cbScratch ); + + // + // Prime Q + // + + // Wiping of previous (optional) parameters related to Q + if (pDlgroup->pmQ != NULL) + { + SymCryptModulusWipe( pDlgroup->pmQ ); + } + if (pDlgroup->cbSeed != 0) + { + SymCryptWipe( pDlgroup->pbSeed, pDlgroup->cbSeed); + } + + if (pbPrimeQ != NULL) + { + // Create an integer piTemp + piTemp = SymCryptIntCreate( pbScratch, cbScratch, nMaxDigitsOfQ ); + + // Set the prime Q (this will fail if the number cannot fit in the object) + scError = SymCryptIntSetValue( pbPrimeQ, cbPrimeQ, numFormat, piTemp ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // Check the bitsize of value + nBitsOfQ = SymCryptIntBitsizeOfValue(piTemp); + if ( nBitsOfQ > pDlgroup->nMaxBitsOfQ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if (nBitsOfQ < SYMCRYPT_DLGROUP_MIN_BITSIZE_Q) + { + scError = SYMCRYPT_WRONG_KEY_SIZE; + goto cleanup; + } + + if( (SymCryptIntGetValueLsbits32( piTemp ) & 1) == 0 ) + { + // Some of our modinv algorithms require odd inputs, and Q should be odd as it + // claims to be a prime. + // (Q can't be 2 as it must be at least 32 bits long.) + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Set the bitsize and bytesize of the value + pDlgroup->nBitsOfQ = nBitsOfQ; + pDlgroup->cbPrimeQ = (nBitsOfQ + 7)/8; + pDlgroup->nDigitsOfQ = SymCryptDigitsFromBits(nBitsOfQ); + pDlgroup->nDefaultBitsPriv = nBitsOfQ; + pDlgroup->nBitsOfSeed = nBitsOfQ; + pDlgroup->cbSeed = (nBitsOfQ+7)/8; + + // Create the modulus Q + pDlgroup->pmQ = SymCryptModulusCreate( pDlgroup->pbQ, SymCryptSizeofModulusFromDigits( pDlgroup->nDigitsOfQ ), pDlgroup->nDigitsOfQ ); + + // Set the prime Q + scError = SymCryptIntCopyMixedSize( piTemp, SymCryptIntFromModulus(pDlgroup->pmQ) ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // piTemp is not needed any more so we are free to re-use the scratch space + + // IntToModulus requirement: + // nBitsOfQ >= SYMCRYPT_DLGROUP_MIN_BITSIZE_Q --> Q > 0 + SymCryptIntToModulus( + SymCryptIntFromModulus( pDlgroup->pmQ ), + pDlgroup->pmQ, + 1000*nBitsOfP, // Average operations + SYMCRYPT_FLAG_DATA_PUBLIC | SYMCRYPT_FLAG_MODULUS_PRIME, + pbScratch, + cbScratch ); + + pDlgroup->fHasPrimeQ = TRUE; + } + else + { + // Clear all info about Q + pDlgroup->cbPrimeQ = 0; + pDlgroup->nBitsOfQ = 0; + pDlgroup->nDigitsOfQ = 0; + + pDlgroup->nDefaultBitsPriv = 0; + pDlgroup->nBitsOfSeed = 0; + pDlgroup->cbSeed = 0; + + pDlgroup->pmQ = NULL; + pDlgroup->fHasPrimeQ = FALSE; + } + + pDlgroup->isSafePrimeGroup = FALSE; + pDlgroup->nMinBitsPriv = 0; + + // + // Provided Generator G + // + if (pbGenG != NULL) + { + // Set the generator G (this will fail if the number cannot fit in the object) + scError = SymCryptModElementSetValue( pbGenG, cbGenG, numFormat, pDlgroup->pmP, pDlgroup->peG, pbScratch, cbScratch ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + scError = SymCryptDlgroupAutoCompleteNamedSafePrimeGroup( pDlgroup, pbScratch, cbScratch ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // Successfully detected, validated and autocompleted named safe-prime group + if (pDlgroup->isSafePrimeGroup) + { + goto cleanup; + } + } + + // + // Verification data (this has to be done before possibly generating G) + // + + // Set the FIPS standard + pDlgroup->eFipsStandard = fipsStandard; + + // Set the seed + if (pbSeed != NULL) + { + if (cbSeed != pDlgroup->cbSeed) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + memcpy( pDlgroup->pbSeed, pbSeed, cbSeed ); + } + + // Set the genCounter + pDlgroup->dwGenCounter = genCounter; + + // + // Generator G + // + + if (pbGenG == NULL) + { + // Let's generate G here since none was given + + // // We need Q (check at the beginning) + // if (pbPrimeQ==NULL) + // { + // scError = SYMCRYPT_INVALID_ARGUMENT; + // goto cleanup; + // } + + // If no seed was given let's generate our own + if (pbSeed==NULL) + { + SymCryptCallbackRandom(pDlgroup->pbSeed, pDlgroup->cbSeed); + } + + scError = SymCryptDlgroupGenerateGenG_FIPS( + pDlgroup, + pDlgroup->peG, + pbScratch, + cbScratch ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + } + + + // Verification + if (fipsStandard != SYMCRYPT_DLGROUP_FIPS_NONE) + { + // Verification + PBYTE pbScratchInternal = pbScratch; + SIZE_T cbScratchInternal = cbScratch; + + UINT32 ndDivTwoQ = 0; + UINT32 cbDivTwoQ = 0; + PSYMCRYPT_DIVISOR pdDivTwoQ = NULL; + + UINT32 cbComputed = 0; + PSYMCRYPT_INT piComputed = NULL; + UINT32 fPrimeComputed = 0; + UINT32 dwComputedCounter = 0; + + PSYMCRYPT_MODELEMENT peComputed = NULL; + PSYMCRYPT_MODELEMENT peOne = NULL; + + // Step 3: Acceptable pairs of L,N => skipped + + // Step 6: nBitsOfSeed < nBitsOfQ => skipped + + // Create the divisor object + ndDivTwoQ = SymCryptDigitsFromBits(pDlgroup->nBitsOfQ + 1); + cbDivTwoQ = SymCryptSizeofDivisorFromDigits( ndDivTwoQ ); + pdDivTwoQ = SymCryptDivisorCreate( pbScratchInternal, cbDivTwoQ, ndDivTwoQ ); + pbScratchInternal += cbDivTwoQ; + cbScratchInternal -= cbDivTwoQ; + + // Create the temporary integer of size Q + cbComputed = SymCryptSizeofIntFromDigits( pDlgroup->nDigitsOfQ ); + piComputed = SymCryptIntCreate( pbScratchInternal, cbComputed, pDlgroup->nDigitsOfQ ); + pbScratchInternal += cbComputed; + cbScratchInternal -= cbComputed; + + // Create a trial division context for both P and Q + pTrialDivisionContext = SymCryptCreateTrialDivisionContext( pDlgroup->nDigitsOfP ); + if (pTrialDivisionContext == NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + // Steps 8,9: Check if computed_q is prime and equal to q + scError = SymCryptDlgroupGeneratePrimeQ_FIPS( + pDlgroup, + pTrialDivisionContext, + &fPrimeComputed, + piComputed, + pdDivTwoQ, + pbScratchInternal, + cbScratchInternal ); + if (scError != SYMCRYPT_NO_ERROR) + { + scError = SYMCRYPT_AUTHENTICATION_FAILURE; // Overwrite any possible error + goto cleanup; + } + + if ((!fPrimeComputed)||(!SymCryptIntIsEqual( piComputed, SymCryptIntFromModulus(pDlgroup->pmQ)))) + { + scError = SYMCRYPT_AUTHENTICATION_FAILURE; + goto cleanup; + } + + // Create the temporary integer of size P + pbScratchInternal -= cbComputed; + cbScratchInternal += cbComputed; + cbComputed = SymCryptSizeofIntFromDigits( pDlgroup->nDigitsOfP ); + piComputed = SymCryptIntCreate( pbScratchInternal, cbComputed, pDlgroup->nDigitsOfP ); + pbScratchInternal += cbComputed; + cbScratchInternal -= cbComputed; + + // Steps 10-14: Check if computed_p is prime and equal to p + scError = SymCryptDlgroupGeneratePrimeP_FIPS( + pDlgroup, + pdDivTwoQ, + pDlgroup->dwGenCounter, // Go up to this + pTrialDivisionContext, + &fPrimeComputed, + piComputed, + &dwComputedCounter, + pbScratchInternal, + cbScratchInternal ); + if (scError != SYMCRYPT_NO_ERROR) + { + scError = SYMCRYPT_AUTHENTICATION_FAILURE; // Overwrite any possible error + goto cleanup; + } + + if ((!fPrimeComputed)||(dwComputedCounter!=pDlgroup->dwGenCounter)||(!SymCryptIntIsEqual( piComputed, SymCryptIntFromModulus(pDlgroup->pmP)))) + { + scError = SYMCRYPT_AUTHENTICATION_FAILURE; + goto cleanup; + } + + // Validation of G + + // Create the temporary modelement mod P + pbScratchInternal -= cbComputed; + cbScratchInternal += cbComputed; + cbComputed = SymCryptSizeofModElementFromModulus( pDlgroup->pmP ); + peOne = SymCryptModElementCreate( pbScratchInternal, cbComputed, pDlgroup->pmP ); + pbScratchInternal += cbComputed; + cbScratchInternal -= cbComputed; + peComputed = SymCryptModElementCreate( pbScratchInternal, cbComputed, pDlgroup->pmP ); + pbScratchInternal += cbComputed; + cbScratchInternal -= cbComputed; + + // Step 2: Verify that 2<= G <= p-1 + SymCryptModElementSetValueUint32( 1, pDlgroup->pmP, peOne, pbScratchInternal, cbScratchInternal ); // Set the temporary to 1 + + if ((SymCryptModElementIsZero(pDlgroup->pmP, pDlgroup->peG)) || (SymCryptModElementIsEqual(pDlgroup->pmP, pDlgroup->peG, peOne))) + { + scError = SYMCRYPT_AUTHENTICATION_FAILURE; + goto cleanup; + } + + // Step 3: Verify that G^Q == 1 + SymCryptModExp( + pDlgroup->pmP, + pDlgroup->peG, + SymCryptIntFromModulus(pDlgroup->pmQ), + nBitsOfQ, + SYMCRYPT_FLAG_DATA_PUBLIC, + peComputed, + pbScratchInternal, + cbScratchInternal ); + + if (!SymCryptModElementIsEqual(pDlgroup->pmP, peComputed, peOne)) + { + scError = SYMCRYPT_AUTHENTICATION_FAILURE; + goto cleanup; + } + + } + +cleanup: + if (pTrialDivisionContext!=NULL) + { + SymCryptFreeTrialDivisionContext( pTrialDivisionContext ); + } + + if (pbScratch!=NULL) + { + SymCryptWipe( pbScratch, cbScratch ); + SymCryptCallbackFree( pbScratch ); + } + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDlgroupGetValue( + _In_ PCSYMCRYPT_DLGROUP pDlgroup, + _Out_writes_bytes_( cbPrimeP ) PBYTE pbPrimeP, + SIZE_T cbPrimeP, + _Out_writes_bytes_( cbPrimeQ ) PBYTE pbPrimeQ, + SIZE_T cbPrimeQ, + _Out_writes_bytes_( cbGenG ) PBYTE pbGenG, + SIZE_T cbGenG, + SYMCRYPT_NUMBER_FORMAT numFormat, + _Out_ PCSYMCRYPT_HASH * ppHashAlgorithm, + _Out_writes_bytes_( cbSeed ) PBYTE pbSeed, + SIZE_T cbSeed, + _Out_ PUINT32 pGenCounter ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PBYTE pbScratch = NULL; + SIZE_T cbScratch = 0; + + if ( ((pbPrimeP==NULL)&&(cbPrimeP>0)) || + ((pbPrimeQ==NULL)&&(cbPrimeQ>0)) || + ((pbGenG==NULL)&&(cbGenG>0)) || + ((pbSeed==NULL)&&(cbSeed>0)) || + ((pbSeed!=NULL)&&(cbSeed!=pDlgroup->cbSeed)) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if ((pbPrimeQ!=NULL) && (!pDlgroup->fHasPrimeQ)) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + if (pbPrimeP!=NULL) + { + scError = SymCryptIntGetValue( + SymCryptIntFromModulus(pDlgroup->pmP), + pbPrimeP, + cbPrimeP, + numFormat ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + } + + if (pbPrimeQ!=NULL) + { + scError = SymCryptIntGetValue( + SymCryptIntFromModulus(pDlgroup->pmQ), + pbPrimeQ, + cbPrimeQ, + numFormat ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + } + + if (pbGenG!=NULL) + { + // Scratch space is needed + cbScratch = SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pDlgroup->nDigitsOfP); + pbScratch = SymCryptCallbackAlloc( cbScratch ); + if (pbScratch==NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + scError = SymCryptModElementGetValue( + pDlgroup->pmP, + pDlgroup->peG, + pbGenG, + cbGenG, + numFormat, + pbScratch, + cbScratch ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + } + + if (ppHashAlgorithm!=NULL) + { + if (pDlgroup->eFipsStandard == SYMCRYPT_DLGROUP_FIPS_186_2) + { + *ppHashAlgorithm = NULL; + } + else + { + *ppHashAlgorithm = pDlgroup->pHashAlgorithm; + } + } + + if (pbSeed!=NULL && pDlgroup->pbSeed!=NULL) + { + memcpy( pbSeed, pDlgroup->pbSeed, pDlgroup->cbSeed); + } + + if (pGenCounter!=NULL) + { + *pGenCounter = pDlgroup->dwGenCounter; + } + +cleanup: + if (pbScratch!=NULL) + { + SymCryptWipe( pbScratch, cbScratch ); + SymCryptCallbackFree( pbScratch ); + } + return scError; +} diff --git a/libs/symcrypt/lib/dlkey.c b/libs/symcrypt/lib/dlkey.c new file mode 100644 index 00000000000..7df34d6125d --- /dev/null +++ b/libs/symcrypt/lib/dlkey.c @@ -0,0 +1,921 @@ +// +// dlkey.c Dlkey functions +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// + +#include "precomp.h" + +PSYMCRYPT_DLKEY +SYMCRYPT_CALL +SymCryptDlkeyAllocate( _In_ PCSYMCRYPT_DLGROUP pDlgroup ) +{ + PVOID p; + SIZE_T cb; + PSYMCRYPT_DLKEY res = NULL; + + cb = SymCryptSizeofDlkeyFromDlgroup( pDlgroup ); + + p = SymCryptCallbackAlloc( cb ); + + if ( p==NULL ) + { + goto cleanup; + } + + res = SymCryptDlkeyCreate( p, cb, pDlgroup ); + +cleanup: + return res; +} + +VOID +SYMCRYPT_CALL +SymCryptDlkeyFree( _Out_ PSYMCRYPT_DLKEY pkObj ) +{ + SYMCRYPT_CHECK_MAGIC( pkObj ); + SymCryptDlkeyWipe( pkObj ); + SymCryptCallbackFree( pkObj ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptSizeofDlkeyFromDlgroup( _In_ PCSYMCRYPT_DLGROUP pDlgroup ) +{ + // Always allocate memory for large private keys + return sizeof(SYMCRYPT_DLKEY) + SymCryptSizeofModElementFromModulus( pDlgroup->pmP ) + SymCryptSizeofIntFromDigits( pDlgroup->nDigitsOfP ); +} + +PSYMCRYPT_DLKEY +SYMCRYPT_CALL +SymCryptDlkeyCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + _In_ PCSYMCRYPT_DLGROUP pDlgroup ) +{ + PSYMCRYPT_DLKEY pkRes = NULL; + UINT32 cbModElement = SymCryptSizeofModElementFromModulus( pDlgroup->pmP ); + + SYMCRYPT_ASSERT( cbBuffer >= SymCryptSizeofDlkeyFromDlgroup( pDlgroup ) ); + SYMCRYPT_ASSERT( cbBuffer >= sizeof(SYMCRYPT_DLKEY) + cbModElement ); + UNREFERENCED_PARAMETER( cbBuffer ); // only referenced in above ASSERTs... + SYMCRYPT_ASSERT_ASYM_ALIGNED( pbBuffer ); + + pkRes = (PSYMCRYPT_DLKEY) pbBuffer; + + // DLKEY parameters + pkRes->fAlgorithmInfo = 0; + pkRes->pDlgroup = pDlgroup; + pkRes->fHasPrivateKey = FALSE; + pkRes->fPrivateModQ = FALSE; // This will be properly set during generate or setvalue + pkRes->nBitsPriv = pDlgroup->nDefaultBitsPriv; + + // Create SymCrypt objects + pbBuffer += sizeof(SYMCRYPT_DLKEY); + + pkRes->pePublicKey = SymCryptModElementCreate( pbBuffer, cbModElement, pDlgroup->pmP ); + if (pkRes->pePublicKey == NULL) + { + goto cleanup; + } + pbBuffer += cbModElement; + + // + // **** Always defer the creation of the private key until the key generation or + // set value. + // + // In place of the pbPrivate pointer store the pointer to the allocated buffer. + // + pkRes->pbPrivate = pbBuffer; + pkRes->piPrivateKey = NULL; + + // Setting the magic + SYMCRYPT_SET_MAGIC( pkRes ); + +cleanup: + return pkRes; +} + +VOID +SYMCRYPT_CALL +SymCryptDlkeyWipe( _Out_ PSYMCRYPT_DLKEY pkDst ) +{ + SymCryptWipe( (PBYTE) pkDst, SymCryptSizeofDlkeyFromDlgroup(pkDst->pDlgroup) ); +} + +VOID +SYMCRYPT_CALL +SymCryptDlkeyCopy( + _In_ PCSYMCRYPT_DLKEY pkSrc, + _Out_ PSYMCRYPT_DLKEY pkDst ) +{ + PCSYMCRYPT_DLGROUP pDlgroup = pkSrc->pDlgroup; + + // + // in-place copy is somewhat common... + // + if( pkSrc != pkDst ) + { + pkDst->fAlgorithmInfo = pkSrc->fAlgorithmInfo; + pkDst->fHasPrivateKey = pkSrc->fHasPrivateKey; + pkDst->fPrivateModQ = pkSrc->fPrivateModQ; + pkDst->nBitsPriv = pkSrc->nBitsPriv; + + // Copy the public key + SymCryptModElementCopy( pDlgroup->pmP, pkSrc->pePublicKey, pkDst->pePublicKey ); + + // Copy the private key + SymCryptIntCopy( pkSrc->piPrivateKey, pkDst->piPrivateKey ); + } +} + + +// DLKEY specific functions + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDlkeySetPrivateKeyLength( _Inout_ PSYMCRYPT_DLKEY pkDlkey, UINT32 nBitsPriv, UINT32 flags ) +{ + if( nBitsPriv > pkDlkey->pDlgroup->nBitsOfQ || + nBitsPriv < pkDlkey->pDlgroup->nMinBitsPriv || + flags != 0 ) + { + return SYMCRYPT_INVALID_ARGUMENT; + } + + pkDlkey->nBitsPriv = nBitsPriv; + return SYMCRYPT_NO_ERROR; +} + +PCSYMCRYPT_DLGROUP +SYMCRYPT_CALL +SymCryptDlkeyGetGroup( _In_ PCSYMCRYPT_DLKEY pkDlkey ) +{ + return pkDlkey->pDlgroup; +} + +UINT32 +SYMCRYPT_CALL +SymCryptDlkeySizeofPublicKey( _In_ PCSYMCRYPT_DLKEY pkDlkey ) +{ + return pkDlkey->pDlgroup->cbPrimeP; +} + +UINT32 +SYMCRYPT_CALL +SymCryptDlkeySizeofPrivateKey( _In_ PCSYMCRYPT_DLKEY pkDlkey ) +{ + PCSYMCRYPT_DLGROUP pDlgroup = pkDlkey->pDlgroup; + + if (pkDlkey->fPrivateModQ) + { + if (pDlgroup->fHasPrimeQ) + { + if (pkDlkey->nBitsPriv != pDlgroup->nBitsOfQ) + { + return (pkDlkey->nBitsPriv + 7) / 8; + } + else + { + return pDlgroup->cbPrimeQ; + } + } + else + { + return pDlgroup->cbPrimeP; // Somehow the group has no prime Q but the key was set with prime Q, return the safe option + } + } + else + { + return pDlgroup->cbPrimeP; + } +} + +BOOLEAN +SYMCRYPT_CALL +SymCryptDlkeyHasPrivateKey( _In_ PCSYMCRYPT_DLKEY pkDlkey ) +{ + return pkDlkey->fHasPrivateKey; +} + +#define SYMCRYPT_FLAG_DLKEY_PUBLIC_KEY_ORDER_VALIDATION (0x1) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDlkeyPerformPublicKeyValidation( + _In_ PCSYMCRYPT_DLKEY pkDlkey, + _In_ UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + PCSYMCRYPT_DLGROUP pDlgroup = pkDlkey->pDlgroup; + + PSYMCRYPT_MODELEMENT peTmp = NULL; + PSYMCRYPT_MODELEMENT peTmpPublicKeyExpQ = NULL; + UINT32 cbModElement = SymCryptSizeofModElementFromModulus( pDlgroup->pmP ); + + SYMCRYPT_ASSERT( cbScratch >= (2 * cbModElement) + + SYMCRYPT_SCRATCH_BYTES_FOR_MODEXP(pDlgroup->nDigitsOfP) ); + + // Check if Public key is 0 + if ( SymCryptModElementIsZero( pDlgroup->pmP, pkDlkey->pePublicKey ) ) + { + return SYMCRYPT_INVALID_ARGUMENT; + } + + peTmp = SymCryptModElementCreate( pbScratch, cbModElement, pDlgroup->pmP); + pbScratch += cbModElement; + cbScratch -= cbModElement; + + // Check if Public key is P-1 + SymCryptModElementSetValueNegUint32( 1, pDlgroup->pmP, peTmp, pbScratch, cbScratch ); + if ( SymCryptModElementIsEqual( pDlgroup->pmP, pkDlkey->pePublicKey, peTmp ) ) + { + return SYMCRYPT_INVALID_ARGUMENT; + } + + // Check if Public key is 1 (do this check second as we may reuse 1 element in next check) + SymCryptModElementSetValueUint32( 1, pDlgroup->pmP, peTmp, pbScratch, cbScratch ); + if ( SymCryptModElementIsEqual( pDlgroup->pmP, pkDlkey->pePublicKey, peTmp ) ) + { + return SYMCRYPT_INVALID_ARGUMENT; + } + + // Perform validation that Public key is in a subgroup of order Q. + if ( (flags & SYMCRYPT_FLAG_DLKEY_PUBLIC_KEY_ORDER_VALIDATION) != 0 ) + { + peTmpPublicKeyExpQ = SymCryptModElementCreate( pbScratch, cbModElement, pDlgroup->pmP); + pbScratch += cbModElement; + cbScratch -= cbModElement; + + // Ensure that Q is specified in the Dlgroup + if ( !pDlgroup->fHasPrimeQ ) + { + return SYMCRYPT_INVALID_ARGUMENT; + } + + // Calculate peTmpPublicKeyExpQ = (Public key)^Q + SymCryptModExp( + pDlgroup->pmP, + pkDlkey->pePublicKey, + SymCryptIntFromModulus( pDlgroup->pmQ ), + pDlgroup->nBitsOfQ, + SYMCRYPT_FLAG_DATA_PUBLIC, // No need for side-channel safety for public key validation + peTmpPublicKeyExpQ, + pbScratch, + cbScratch ); + + // Ensure (Public key)^Q == 1 mod P + if ( !SymCryptModElementIsEqual( pDlgroup->pmP, peTmpPublicKeyExpQ, peTmp ) ) + { + return SYMCRYPT_INVALID_ARGUMENT; + } + } + + return SYMCRYPT_NO_ERROR; +} + +#define DLKEY_GEN_RANDOM_GENERIC_LIMIT (1000) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDlkeyGenerate( + _In_ UINT32 flags, + _Inout_ PSYMCRYPT_DLKEY pkDlkey ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PBYTE pbScratch = NULL; + SIZE_T cbScratch = 0; + PBYTE pbScratchInternal = NULL; + SIZE_T cbScratchInternal = 0; + + PCSYMCRYPT_DLGROUP pDlgroup = pkDlkey->pDlgroup; + + PSYMCRYPT_MODELEMENT pePrivateKey = NULL; + UINT32 cbPrivateKey = 0; + + PSYMCRYPT_MODULUS pmPriv = NULL; + UINT32 nDigitsPriv = 0; + UINT32 nBitsPriv = 0; + UINT32 fFlagsForModSetRandom = 0; + + BOOLEAN useModSetRandom = TRUE; + UINT32 nBytesPriv = 0; + UINT32 dwShiftBits; + BYTE privMask; + UINT32 cntr; + + PSYMCRYPT_MODELEMENT peTmp = NULL; + UINT32 cbModElement = SymCryptSizeofModElementFromModulus( pDlgroup->pmP ); + + // Ensure caller has specified what algorithm(s) the key will be used with + UINT32 algorithmFlags = SYMCRYPT_FLAG_DLKEY_DSA | SYMCRYPT_FLAG_DLKEY_DH; + // Make sure only allowed flags are specified + UINT32 allowedFlags = SYMCRYPT_FLAG_DLKEY_GEN_MODP | SYMCRYPT_FLAG_KEY_NO_FIPS | algorithmFlags; + + if ( ( ( flags & ~allowedFlags ) != 0 ) || + ( ( flags & algorithmFlags ) == 0 ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Extra sanity checks when running with FIPS + // Either Dlgroup is named SafePrime group and key is for DH, + // or Dlgroup is not named SafePrime group and key is for DSA + if ( ( ( flags & SYMCRYPT_FLAG_KEY_NO_FIPS ) == 0 ) && + ( (pDlgroup->isSafePrimeGroup && (flags & SYMCRYPT_FLAG_DLKEY_DSA)) || + (!(pDlgroup->isSafePrimeGroup) && (flags & SYMCRYPT_FLAG_DLKEY_DH)) ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + pkDlkey->fPrivateModQ = (((flags & SYMCRYPT_FLAG_DLKEY_GEN_MODP)==0) && (pDlgroup->fHasPrimeQ)); + + if (pkDlkey->fPrivateModQ) + { + pmPriv = pDlgroup->pmQ; + nDigitsPriv = pDlgroup->nDigitsOfQ; + nBitsPriv = pDlgroup->nBitsOfQ; + fFlagsForModSetRandom = SYMCRYPT_FLAG_MODRANDOM_ALLOW_ONE | SYMCRYPT_FLAG_MODRANDOM_ALLOW_MINUSONE; // 1 to Q-1 + + if ( pDlgroup->isSafePrimeGroup && (pkDlkey->nBitsPriv != pDlgroup->nBitsOfQ) ) + { + useModSetRandom = FALSE; + SYMCRYPT_ASSERT( pkDlkey->nBitsPriv < pDlgroup->nBitsOfQ ); // 2^nBitsPriv < Q + + nBitsPriv = pkDlkey->nBitsPriv; // 1 to (2^nBitsPriv)-1 + nBytesPriv = (pkDlkey->nBitsPriv + 7) / 8; + } + } + else + { + // We perform Private key range validation by construction + // The Private key is constructed in the range [1,min(2^nBitsPriv,Q)-1] precisely when pkDlkey->fPrivateModQ + if ( (flags & SYMCRYPT_FLAG_KEY_NO_FIPS) == 0 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + pmPriv = pDlgroup->pmP; + nDigitsPriv = pDlgroup->nDigitsOfP; + nBitsPriv = pDlgroup->nBitsOfP; + fFlagsForModSetRandom = SYMCRYPT_FLAG_MODRANDOM_ALLOW_ONE; // 1 to P-2 + } + + cbPrivateKey = SymCryptSizeofModElementFromModulus( pmPriv ); + + // + // From symcrypt_internal.h we have: + // - sizeof results are upper bounded by 2^19 + // - SYMCRYPT_SCRATCH_BYTES results are upper bounded by 2^27 (including RSA and ECURVE) + // Thus the following calculation does not overflow cbScratch. + // + cbScratch = SYMCRYPT_MAX( cbPrivateKey + SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS(nDigitsPriv), + (2 * cbModElement) + SYMCRYPT_SCRATCH_BYTES_FOR_MODEXP(pDlgroup->nDigitsOfP)); + pbScratch = SymCryptCallbackAlloc( cbScratch ); + if (pbScratch == NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + // Create the private key integer + pkDlkey->piPrivateKey = SymCryptIntCreate( pkDlkey->pbPrivate, SymCryptSizeofIntFromDigits(nDigitsPriv), nDigitsPriv ); + + if (useModSetRandom) + { + // Create the private key modelement + pePrivateKey = SymCryptModElementCreate( pbScratch, cbPrivateKey, pmPriv ); + pbScratchInternal = pbScratch + cbPrivateKey; + cbScratchInternal = cbScratch - cbPrivateKey; + + // Set a modelement from 1 to q-1 (or 1 to p-2) + SymCryptModSetRandom( + pmPriv, + pePrivateKey, + fFlagsForModSetRandom, + pbScratchInternal, + cbScratchInternal ); + + // Set the private key + SymCryptModElementToInt( + pmPriv, + pePrivateKey, + pkDlkey->piPrivateKey, + pbScratchInternal, + cbScratchInternal ); + } + else + { + // Set private key from 1 to (2^nBitsPriv)-1 + // Wipe any bytes we won't fill with random + SymCryptWipe( pbScratch + nBytesPriv, (nDigitsPriv * SYMCRYPT_FDEF_DIGIT_SIZE) - nBytesPriv ); + + dwShiftBits = (0u-nBitsPriv) & 7; + privMask = (BYTE)(0xff >> dwShiftBits); + + for(cntr=0; cntr<DLKEY_GEN_RANDOM_GENERIC_LIMIT; cntr++) + { + // Try random values until we get one we like + SymCryptCallbackRandom( pbScratch, nBytesPriv ); + + pbScratch[nBytesPriv-1] &= privMask; + + // If non-zero we have a value in range [1, (2^nBitsPriv)-1] + if( !SymCryptFdefRawIsEqualUint32( (PCUINT32)pbScratch, nDigitsPriv, 0 ) ) + { + break; + } + } + + if (cntr >= DLKEY_GEN_RANDOM_GENERIC_LIMIT) + { + SymCryptFatal( 'rndl' ); + } + + scError = SymCryptIntSetValue( pbScratch, nBytesPriv, SYMCRYPT_NUMBER_FORMAT_LSB_FIRST, pkDlkey->piPrivateKey ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + } + + // Calculate the public key + SymCryptModExp( + pDlgroup->pmP, + pDlgroup->peG, + pkDlkey->piPrivateKey, + nBitsPriv, + 0, // Side-channel safe + pkDlkey->pePublicKey, + pbScratch, // We can overwrite pePrivateKey now + cbScratch ); + + // Perform range validation on generated Public key. + if ( (flags & SYMCRYPT_FLAG_KEY_NO_FIPS) == 0 ) + { + // Perform Public key validation. + // Always perform range validation, and validation that Public key is in subgroup of order Q + scError = SymCryptDlkeyPerformPublicKeyValidation( + pkDlkey, + SYMCRYPT_FLAG_DLKEY_PUBLIC_KEY_ORDER_VALIDATION, + pbScratch, + cbScratch ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + } + + // Set the fHasPrivateKey flag + pkDlkey->fHasPrivateKey = TRUE; + + pkDlkey->fAlgorithmInfo = flags; // We want to track all of the flags in the Dlkey + + if ( (flags & SYMCRYPT_FLAG_KEY_NO_FIPS) == 0 ) + { + if( ( flags & SYMCRYPT_FLAG_DLKEY_DSA ) != 0 ) + { + // Ensure DSA algorithm selftest is run before first use of DSA algorithm + SYMCRYPT_RUN_SELFTEST_ONCE( + SymCryptDsaSelftest, + SYMCRYPT_SELFTEST_ALGORITHM_DSA ); + + // Run PCT eagerly as the key can only be used for DSA - there is no value in deferring + SYMCRYPT_RUN_KEY_GEN_PCT( + SymCryptDsaPct, + pkDlkey, + SYMCRYPT_PCT_DSA ); + } + + if( ( flags & SYMCRYPT_FLAG_DLKEY_DH ) != 0 ) + { + // Ensure we have run the algorithm selftest at least once. + SYMCRYPT_RUN_SELFTEST_ONCE( + SymCryptDhSecretAgreementSelftest, + SYMCRYPT_SELFTEST_ALGORITHM_DH ); + + // Run PCT eagerly as the key can only be used for DH + + // DH PCT per SP80056a-rev3 5.6.2.1.4 b) + // Recompute the public key from the private key + // Option a) appears to be explicitly overruled by 140-3 IG + + // Calculate the public key from the private key in scratch + pbScratchInternal = pbScratch; + cbScratchInternal = cbScratch; + + peTmp = SymCryptModElementCreate( pbScratchInternal, cbModElement, pDlgroup->pmP ); + pbScratchInternal += cbModElement; + cbScratchInternal -= cbModElement; + + SymCryptModExp( + pDlgroup->pmP, + pDlgroup->peG, + pkDlkey->piPrivateKey, + nBitsPriv, // This is either bits of P, Q, or some caller-defined value i.e. public values + 0, // Side-channel safe + peTmp, + pbScratchInternal, + cbScratchInternal ); + + SYMCRYPT_FIPS_ASSERT( SymCryptModElementIsEqual(pDlgroup->pmP, peTmp, pkDlkey->pePublicKey) ); + } + } + +cleanup: + if (pbScratch!=NULL) + { + SymCryptWipe( pbScratch, cbScratch ); + SymCryptCallbackFree( pbScratch ); + } + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDlkeySetValue( + _In_reads_bytes_( cbPrivateKey ) PCBYTE pbPrivateKey, + SIZE_T cbPrivateKey, + _In_reads_bytes_( cbPublicKey ) PCBYTE pbPublicKey, + SIZE_T cbPublicKey, + SYMCRYPT_NUMBER_FORMAT numFormat, + UINT32 flags, + _Inout_ PSYMCRYPT_DLKEY pkDlkey ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PBYTE pbScratch = NULL; + UINT32 cbScratch = 0; + PBYTE pbScratchInternal = NULL; + UINT32 cbScratchInternal = 0; + + PCSYMCRYPT_DLGROUP pDlgroup = pkDlkey->pDlgroup; + + UINT32 nDigitsPriv = 0; + UINT32 nBitsPriv = 0; + + PSYMCRYPT_MODELEMENT peTmp = NULL; + UINT32 cbModElement = SymCryptSizeofModElementFromModulus( pDlgroup->pmP ); + UINT32 fValidatePublicKeyOrder = SYMCRYPT_FLAG_DLKEY_PUBLIC_KEY_ORDER_VALIDATION; + + if ( ((pbPrivateKey==NULL) && (cbPrivateKey!=0)) || + ((pbPublicKey==NULL) && (cbPublicKey!=0)) || + ((pbPrivateKey==NULL) && (pbPublicKey==NULL)) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Ensure caller has specified what algorithm(s) the key will be used with + UINT32 algorithmFlags = SYMCRYPT_FLAG_DLKEY_DSA | SYMCRYPT_FLAG_DLKEY_DH; + // Make sure only allowed flags are specified + UINT32 allowedFlags = SYMCRYPT_FLAG_KEY_NO_FIPS | SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION | algorithmFlags; + + if ( ( ( flags & ~allowedFlags ) != 0 ) || + ( ( flags & algorithmFlags ) == 0 ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Extra sanity checks when running with FIPS + // Either Dlgroup is named SafePrime group and key is for DH, + // or Dlgroup is not named SafePrime group and key is for DSA + if ( ( ( flags & SYMCRYPT_FLAG_KEY_NO_FIPS ) == 0 ) && + ( (pDlgroup->isSafePrimeGroup && (flags & SYMCRYPT_FLAG_DLKEY_DSA)) || + (!(pDlgroup->isSafePrimeGroup) && (flags & SYMCRYPT_FLAG_DLKEY_DH)) ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Check that minimal validation flag only specified with no fips + if ( ( ( flags & SYMCRYPT_FLAG_KEY_NO_FIPS ) == 0 ) && + ( ( flags & SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION ) != 0 ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if ( ( flags & SYMCRYPT_FLAG_KEY_NO_FIPS ) != 0 ) + { + fValidatePublicKeyOrder = 0; + } + + // + // From symcrypt_internal.h we have: + // - sizeof results are upper bounded by 2^19 + // - SYMCRYPT_SCRATCH_BYTES results are upper bounded by 2^27 (including RSA and ECURVE) + // Thus the following calculation does not overflow cbScratch. + // + cbScratch = SYMCRYPT_MAX( cbModElement + SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS(pDlgroup->nDigitsOfP), + (2 * cbModElement) + SYMCRYPT_SCRATCH_BYTES_FOR_MODEXP(pDlgroup->nDigitsOfP) ); + pbScratch = SymCryptCallbackAlloc( cbScratch ); + if (pbScratch == NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + if ( pbPrivateKey != NULL ) + { + // + // Check the size of the imported private key to detect if it is mod P or mod Q + // If the group does not have a Q assume that the imported key is modulo P as + // it wouldn't help us assume otherwise (the bitsize of the private key should be kept + // secret from SC attacks). + // If the private key has had some non-default value set for nBitsPriv then the caller + // has explicitly opted in to more stringent range checking. + // + pkDlkey->fPrivateModQ = ( (pDlgroup->fHasPrimeQ) && + ((cbPrivateKey < pDlgroup->cbPrimeQ) || + ((cbPrivateKey == pDlgroup->cbPrimeQ) && (pDlgroup->cbPrimeQ < pDlgroup->cbPrimeP)) || + (pkDlkey->nBitsPriv != pDlgroup->nDefaultBitsPriv)) ); + + if ( pkDlkey->fPrivateModQ ) + { + nDigitsPriv = pDlgroup->nDigitsOfQ; + nBitsPriv = pDlgroup->nBitsOfQ; + + if ( pDlgroup->isSafePrimeGroup ) + { + nBitsPriv = pkDlkey->nBitsPriv; + } + } + else + { + nDigitsPriv = pDlgroup->nDigitsOfP; + nBitsPriv = pDlgroup->nBitsOfP; + } + + pkDlkey->piPrivateKey = SymCryptIntCreate( pkDlkey->pbPrivate, SymCryptSizeofIntFromDigits(nDigitsPriv), nDigitsPriv ); + + scError = SymCryptIntSetValue( + pbPrivateKey, + cbPrivateKey, + numFormat, + pkDlkey->piPrivateKey ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Perform range validation on imported Private key. + // Check if Private key is 0 - perform unconditionally as it is cheap + // and it never makes sense for private key to be 0 intentionally + if ( SymCryptIntIsEqualUint32( pkDlkey->piPrivateKey, 0 ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Continue range validation on imported Private key. + if ( ( flags & SYMCRYPT_FLAG_KEY_NO_FIPS ) == 0 ) + { + // Ensure that Q is specified in the Dlgroup + if ( !pDlgroup->fHasPrimeQ ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // If nBitsPriv is specified, check if Private key is greater than or equal to 2^nBitsPriv + // Otherwise, check if Private key is greater than or equal to Q + if ( ( ( (nBitsPriv < pDlgroup->nBitsOfQ) && + SymCryptIntBitsizeOfValue( pkDlkey->piPrivateKey ) > nBitsPriv ) ) || + ( (nBitsPriv >= pDlgroup->nBitsOfQ) && + !SymCryptIntIsLessThan( pkDlkey->piPrivateKey, SymCryptIntFromModulus( pDlgroup->pmQ ) ) ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + } + + pkDlkey->fHasPrivateKey = TRUE; + } + + if ( pbPublicKey != NULL ) + { + scError = SymCryptModElementSetValue( + pbPublicKey, + cbPublicKey, + numFormat, + pDlgroup->pmP, + pkDlkey->pePublicKey, + pbScratch, + cbScratch ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Perform range validation on imported Public key. + if ( (flags & SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION) == 0 ) + { + // Perform Public key validation. + // Always perform range validation + // May also perform validation that Public key is in subgroup of order Q, depending on flags + scError = SymCryptDlkeyPerformPublicKeyValidation( + pkDlkey, + fValidatePublicKeyOrder, + pbScratch, + cbScratch ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + } + } + + // Calculating the public key if no key was provided + // or if needed for keypair regeneration validation + if ( (pbPublicKey==NULL) || + ( ( ( flags & SYMCRYPT_FLAG_KEY_NO_FIPS ) == 0 ) && + (pbPrivateKey!=NULL) && (pbPublicKey!=NULL) ) ) + { + // Calculate the public key from the private key + pbScratchInternal = pbScratch; + cbScratchInternal = cbScratch; + + // By default calculate the public key directly where it will be persisted + peTmp = pkDlkey->pePublicKey; + + if ( pbPublicKey != NULL ) + { + // If doing regeneration validation calculate the public key in scratch + peTmp = SymCryptModElementCreate( pbScratchInternal, cbModElement, pDlgroup->pmP); + pbScratchInternal += cbModElement; + cbScratchInternal -= cbModElement; + } + + SymCryptModExp( + pDlgroup->pmP, + pDlgroup->peG, + pkDlkey->piPrivateKey, + nBitsPriv, // This is either bits of P, Q, or some caller-defined value i.e. public values + 0, // Side-channel safe + peTmp, + pbScratchInternal, + cbScratchInternal ); + + if ( pbPublicKey != NULL ) + { + if ( !SymCryptModElementIsEqual(pDlgroup->pmP, peTmp, pkDlkey->pePublicKey) ) + { + scError = SYMCRYPT_AUTHENTICATION_FAILURE; + goto cleanup; + } + } + else if ( ( flags & SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION ) == 0 ) + { + // Perform Public key validation on generated public key. + // Always perform range validation + // May also perform validation that Public key is in subgroup of order Q, depending on flags + scError = SymCryptDlkeyPerformPublicKeyValidation( + pkDlkey, + fValidatePublicKeyOrder, + pbScratch, + cbScratch ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + } + } + + pkDlkey->fAlgorithmInfo = flags; // We want to track all of the flags in the Dlkey + + if ( (flags & SYMCRYPT_FLAG_KEY_NO_FIPS) == 0 ) + { + if( ( flags & SYMCRYPT_FLAG_DLKEY_DSA ) != 0 ) + { + // Ensure DSA algorithm selftest is run before first use of DSA algorithm + SYMCRYPT_RUN_SELFTEST_ONCE( + SymCryptDsaSelftest, + SYMCRYPT_SELFTEST_ALGORITHM_DSA ); + + // PCT does not need to be run on import - mark it as done + pkDlkey->fAlgorithmInfo |= SYMCRYPT_PCT_DSA; + } + + if( ( flags & SYMCRYPT_FLAG_DLKEY_DH ) != 0 ) + { + SYMCRYPT_RUN_SELFTEST_ONCE( + SymCryptDhSecretAgreementSelftest, + SYMCRYPT_SELFTEST_ALGORITHM_DH ); + } + } + +cleanup: + if (pbScratch!=NULL) + { + SymCryptWipe( pbScratch, cbScratch ); + SymCryptCallbackFree( pbScratch ); + } + return scError; +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDlkeyGetValue( + _In_ PCSYMCRYPT_DLKEY pkDlkey, + _Out_writes_bytes_( cbPrivateKey ) + PBYTE pbPrivateKey, + SIZE_T cbPrivateKey, + _Out_writes_bytes_( cbPublicKey ) + PBYTE pbPublicKey, + SIZE_T cbPublicKey, + SYMCRYPT_NUMBER_FORMAT numFormat, + UINT32 flags ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PBYTE pbScratch = NULL; + UINT32 cbScratch = 0; + + PCSYMCRYPT_DLGROUP pDlgroup = pkDlkey->pDlgroup; + + UNREFERENCED_PARAMETER( flags ); + + if ( ((pbPrivateKey==NULL) && (cbPrivateKey!=0)) || + ((pbPublicKey==NULL) && (cbPublicKey!=0)) || + ((pbPrivateKey==NULL) && (pbPublicKey==NULL)) || + ((pbPrivateKey!=NULL) && !pkDlkey->fHasPrivateKey) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if (pbPrivateKey != NULL) + { + scError = SymCryptIntGetValue( + pkDlkey->piPrivateKey, + pbPrivateKey, + cbPrivateKey, + numFormat ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + } + + if (pbPublicKey != NULL) + { + cbScratch = SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS(pDlgroup->nDigitsOfP); + pbScratch = SymCryptCallbackAlloc( cbScratch ); + if (pbScratch == NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + scError = SymCryptModElementGetValue( + pDlgroup->pmP, + pkDlkey->pePublicKey, + pbPublicKey, + cbPublicKey, + numFormat, + pbScratch, + cbScratch ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + } + +cleanup: + if (pbScratch!=NULL) + { + SymCryptWipe( pbScratch, cbScratch ); + SymCryptCallbackFree( pbScratch ); + } + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDlkeyExtendKeyUsage( + _Inout_ PSYMCRYPT_DLKEY pkDlkey, + UINT32 flags ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + // Ensure caller has specified what algorithm(s) the key will be used with + UINT32 algorithmFlags = SYMCRYPT_FLAG_DLKEY_DSA | SYMCRYPT_FLAG_DLKEY_DH; + + if ( ( ( flags & ~algorithmFlags ) != 0 ) || + ( ( flags & algorithmFlags ) == 0) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + pkDlkey->fAlgorithmInfo |= flags; + +cleanup: + return scError; +} diff --git a/libs/symcrypt/lib/dsa.c b/libs/symcrypt/lib/dsa.c new file mode 100644 index 00000000000..068d413c8a4 --- /dev/null +++ b/libs/symcrypt/lib/dsa.c @@ -0,0 +1,695 @@ +// +// dsa.c DSA functions +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// + +#include "precomp.h" + +// Truncating function according to the FIPS 186-4 standard +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDsaTruncateHash( + _In_ PCSYMCRYPT_DLGROUP pDlgroup, + _In_reads_bytes_( cbHashValue ) PCBYTE pbHashValue, + SIZE_T cbHashValue, + UINT32 flags, + _Out_ PSYMCRYPT_MODELEMENT peMsghash, + _Out_ PSYMCRYPT_INT piIntLarge, + _Out_ PSYMCRYPT_INT piIntQ, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + UNREFERENCED_PARAMETER( flags ); + + // Get the value of msghash into piIntLarge + scError = SymCryptIntSetValue( pbHashValue, cbHashValue, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, piIntLarge ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Truncate the rightmost bits if the value exceeds the size of the modulus Q + if (SymCryptIntBitsizeOfValue(piIntLarge) > pDlgroup->nBitsOfQ) + { + SymCryptIntDivPow2( piIntLarge, SymCryptIntBitsizeOfValue(piIntLarge) - pDlgroup->nBitsOfQ, piIntLarge ); + } + + scError = SymCryptIntCopyMixedSize( piIntLarge, piIntQ ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + // This should never fail here as we truncated the IntLarge + goto cleanup; + } + + // Now we can call IntToModElement as they have the same digit size + SymCryptIntToModElement( piIntQ, pDlgroup->pmQ, peMsghash, pbScratch, cbScratch ); // msghash mod Q + +cleanup: + return scError; +} + +#define SYMCRYPT_MAX_DSA_SIGNATURE_COUNT (100) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDsaSignEx( + _In_ PCSYMCRYPT_DLKEY pKey, + _In_reads_bytes_( cbHashValue ) PCBYTE pbHashValue, + SIZE_T cbHashValue, + _In_opt_ PCSYMCRYPT_INT piK, + SYMCRYPT_NUMBER_FORMAT format, + UINT32 flags, + _Out_writes_bytes_( cbSignature ) PBYTE pbSignature, + SIZE_T cbSignature ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PBYTE pbScratch = NULL; + SIZE_T cbScratch = 0; + PBYTE pbScratchInternal = NULL; + SIZE_T cbScratchInternal = 0; + + SIZE_T cbScratchInputK = 0; // Extra scratch space needed if the caller specified a K + + PCSYMCRYPT_DLGROUP pDlgroup = pKey->pDlgroup; + UINT32 nDigitsOfP = pDlgroup->nDigitsOfP; + UINT32 nDigitsOfQ = pDlgroup->nDigitsOfQ; + + UINT32 ndIntLarge = 0; + + UINT32 cbIntLarge = 0; + UINT32 cbIntQ = 0; + UINT32 cbIntP = 0; + + UINT32 cbModelementP = 0; + UINT32 cbModelementQ = 0; + + // Helper Integers + PSYMCRYPT_INT piIntLarge = NULL; // Safe size for all caller specified sizes + PSYMCRYPT_INT piIntP = NULL; // Same number of digits as P + PSYMCRYPT_INT piIntQ = NULL; // Same number of digits as Q + + // Elements modulo P + PSYMCRYPT_MODELEMENT peRmodP = NULL; + + // Elements modulo Q + PSYMCRYPT_MODELEMENT peMsghash = NULL; + PSYMCRYPT_MODELEMENT peRmodQ = NULL; + PSYMCRYPT_MODELEMENT peK = NULL; + PSYMCRYPT_MODELEMENT peS = NULL; + + UINT32 signatureCount = 0; + + UNREFERENCED_PARAMETER( flags ); + + // Make sure that the key may be used in DSA + if ( ((pKey->fAlgorithmInfo & SYMCRYPT_FLAG_DLKEY_DSA) == 0) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Make sure that the group and the key have all the + // information for dsa, i.e. prime q and private key + // modulo q, and we are not using a named DH safe-prime + // group + if ((!pDlgroup->fHasPrimeQ) || + (!pKey->fHasPrivateKey) || + (!pKey->fPrivateModQ) || + (pDlgroup->isSafePrimeGroup)) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Calculate the digit size for the HashValue + ndIntLarge = SymCryptDigitsFromBits( (UINT32)cbHashValue * 8 ); + + // Calculate the sizes of temp objects + cbIntLarge = SymCryptSizeofIntFromDigits(ndIntLarge); + cbIntQ = SymCryptSizeofIntFromDigits(nDigitsOfQ); + cbIntP = SymCryptSizeofIntFromDigits(nDigitsOfP); + + cbModelementP = SymCryptSizeofModElementFromModulus( pDlgroup-> pmP ); + cbModelementQ = SymCryptSizeofModElementFromModulus( pDlgroup-> pmQ ); + + // Allocate scratch space + cbScratchInputK = (piK==NULL)?0:SYMCRYPT_SCRATCH_BYTES_FOR_INT_DIVMOD(SymCryptIntDigitsizeOfObject(piK),nDigitsOfQ); + + // + // From symcrypt_internal.h we have: + // - sizeof results are upper bounded by 2^19 + // - SYMCRYPT_SCRATCH_BYTES results are upper bounded by 2^27 (including RSA and ECURVE) + // Thus the following calculation does not overflow cbScratch. + // + cbScratch = cbIntLarge + cbIntQ + cbIntP + cbModelementP + 4*cbModelementQ + + SYMCRYPT_MAX( cbScratchInputK, + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigitsOfQ ), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigitsOfP ), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_MODEXP( nDigitsOfP ), + SYMCRYPT_SCRATCH_BYTES_FOR_MODINV( nDigitsOfQ ) )))); + pbScratch = SymCryptCallbackAlloc( cbScratch ); + if (pbScratch==NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + // Create the objects + pbScratchInternal = pbScratch; + cbScratchInternal = cbScratch; + + piIntLarge = SymCryptIntCreate(pbScratchInternal, cbIntLarge, ndIntLarge); pbScratchInternal += cbIntLarge; cbScratchInternal -= cbIntLarge; + piIntQ = SymCryptIntCreate(pbScratchInternal, cbIntQ, nDigitsOfQ); pbScratchInternal += cbIntQ; cbScratchInternal -= cbIntQ; + piIntP = SymCryptIntCreate(pbScratchInternal, cbIntP, nDigitsOfP); pbScratchInternal += cbIntP; cbScratchInternal -= cbIntP; + + peRmodP = SymCryptModElementCreate(pbScratchInternal, cbModelementP, pDlgroup->pmP); pbScratchInternal += cbModelementP; cbScratchInternal -= cbModelementP; + + peMsghash = SymCryptModElementCreate(pbScratchInternal, cbModelementQ, pDlgroup->pmQ); pbScratchInternal += cbModelementQ; cbScratchInternal -= cbModelementQ; + peRmodQ = SymCryptModElementCreate(pbScratchInternal, cbModelementQ, pDlgroup->pmQ); pbScratchInternal += cbModelementQ; cbScratchInternal -= cbModelementQ; + peK = SymCryptModElementCreate(pbScratchInternal, cbModelementQ, pDlgroup->pmQ); pbScratchInternal += cbModelementQ; cbScratchInternal -= cbModelementQ; + peS = SymCryptModElementCreate(pbScratchInternal, cbModelementQ, pDlgroup->pmQ); pbScratchInternal += cbModelementQ; cbScratchInternal -= cbModelementQ; + + // Get the message into a modelement + scError = SymCryptDsaTruncateHash( + pDlgroup, + pbHashValue, + cbHashValue, + flags, + peMsghash, + piIntLarge, + piIntQ, + pbScratchInternal, + cbScratchInternal ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // + // Main loop: Stop when both R and S are not zero (unless a specific k is provided) + // + while( TRUE ) + { + if (piK==NULL) + { + SymCryptModSetRandom( + pDlgroup->pmQ, + peK, + SYMCRYPT_FLAG_MODRANDOM_ALLOW_ONE|SYMCRYPT_FLAG_MODRANDOM_ALLOW_MINUSONE, + pbScratchInternal, + cbScratchInternal ); + + SymCryptModElementToInt( + pDlgroup->pmQ, + peK, + piIntQ, + pbScratchInternal, + cbScratchInternal ); + } + else + { + SymCryptIntDivMod( + piK, + SymCryptDivisorFromModulus( pDlgroup->pmQ ), + NULL, + piIntQ, + pbScratchInternal, + cbScratchInternal ); + + SymCryptIntToModElement( + piIntQ, + pDlgroup->pmQ, + peK, + pbScratchInternal, + cbScratchInternal ); + + // Make sure that the K passed in is not zero + if (SymCryptModElementIsZero(pDlgroup->pmQ, peK)) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + } + + // Here piIntQ and peK hold the random exponent K + + // G^K mod P + SymCryptModExp( + pDlgroup->pmP, + pDlgroup->peG, + piIntQ, + pDlgroup->nBitsOfQ, + 0, // Side-channel safe + peRmodP, + pbScratchInternal, + cbScratchInternal ); + + // Convert to integer + SymCryptModElementToInt( + pDlgroup->pmP, + peRmodP, + piIntP, + pbScratchInternal, + cbScratchInternal ); + + // Convert to mod Q + SymCryptIntDivMod( + piIntP, + SymCryptDivisorFromModulus( pDlgroup->pmQ ), + NULL, + piIntQ, + pbScratchInternal, + cbScratchInternal ); + + // Convert to modelement + SymCryptIntToModElement( + piIntQ, + pDlgroup->pmQ, + peRmodQ, + pbScratchInternal, + cbScratchInternal ); + + // Invert k mod q + scError = SymCryptModInv( + pDlgroup->pmQ, + peK, + peK, // In place + 0, + pbScratchInternal, + cbScratchInternal ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Get the private key X to modelement + // *** We are sure here that the digit + // size of it is nDigitsOfQ + SymCryptIntToModElement( + pKey->piPrivateKey, + pDlgroup->pmQ, + peS, + pbScratchInternal, + cbScratchInternal ); + + // X*R + SymCryptModMul( + pDlgroup->pmQ, + peS, + peRmodQ, + peS, + pbScratchInternal, + cbScratchInternal ); + + // H(m)+X*R + SymCryptModAdd( + pDlgroup->pmQ, + peS, + peMsghash, + peS, + pbScratchInternal, + cbScratchInternal ); + + // S = k^{-1}*(H(m)+X*R) + SymCryptModMul( + pDlgroup->pmQ, + peK, + peS, + peS, + pbScratchInternal, + cbScratchInternal ); + + if ( !( SymCryptModElementIsZero( pDlgroup->pmQ, peRmodQ ) | + SymCryptModElementIsZero( pDlgroup->pmQ, peS ) ) ) + { + break; + } + + if (piK != NULL) + { + // piK resulted in 0 signature + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + signatureCount++; + if ( signatureCount >= SYMCRYPT_MAX_DSA_SIGNATURE_COUNT ) + { + // We have not generated a non-zero signature after SYMCRYPT_MAX_DSA_SIGNATURE_COUNT attempts; + // Something is wrong with the group setup + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + } + + // Output R + scError = SymCryptModElementGetValue( + pDlgroup->pmQ, + peRmodQ, + pbSignature, + cbSignature / 2, + format, + pbScratchInternal, + cbScratchInternal ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Output S + scError = SymCryptModElementGetValue( + pDlgroup->pmQ, + peS, + pbSignature + cbSignature / 2, + cbSignature / 2, + format, + pbScratchInternal, + cbScratchInternal ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + +cleanup: + if ( pbScratch != NULL ) + { + SymCryptWipe( pbScratch, cbScratch ); + SymCryptCallbackFree( pbScratch ); + } + + if (scError != SYMCRYPT_NO_ERROR) + { + SymCryptWipe( pbSignature, cbSignature ); + } + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDsaSign( + _In_ PCSYMCRYPT_DLKEY pKey, + _In_reads_bytes_( cbHashValue ) PCBYTE pbHashValue, + SIZE_T cbHashValue, + SYMCRYPT_NUMBER_FORMAT format, + UINT32 flags, + _Out_writes_bytes_( cbSignature ) PBYTE pbSignature, + SIZE_T cbSignature ) +{ + return SymCryptDsaSignEx( pKey, pbHashValue, cbHashValue, NULL, format, flags, pbSignature, cbSignature ); +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDsaVerify( + _In_ PCSYMCRYPT_DLKEY pKey, + _In_reads_bytes_( cbHashValue ) PCBYTE pbHashValue, + SIZE_T cbHashValue, + _In_reads_bytes_( cbSignature ) PCBYTE pbSignature, + SIZE_T cbSignature, + SYMCRYPT_NUMBER_FORMAT format, + UINT32 flags ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + BOOLEAN fValidSignature = FALSE; + + PBYTE pbScratch = NULL; + SIZE_T cbScratch = 0; + PBYTE pbScratchInternal = NULL; + SIZE_T cbScratchInternal = 0; + + PCSYMCRYPT_DLGROUP pDlgroup = pKey->pDlgroup; + UINT32 nDigitsOfP = pDlgroup->nDigitsOfP; + UINT32 nDigitsOfQ = pDlgroup->nDigitsOfQ; + + UINT32 ndIntLarge = 0; + + UINT32 cbIntLarge = 0; + UINT32 cbIntQ = 0; + UINT32 cbIntP = 0; + + UINT32 cbModelementP = 0; + UINT32 cbModelementQ = 0; + + PSYMCRYPT_MODELEMENT peBases[2] = { NULL, NULL }; // Array with pointers to base points + + // Helper Integers + PSYMCRYPT_INT piIntLarge = NULL; // Safe size for all caller specified sizes + PSYMCRYPT_INT piIntP = NULL; // Same number of digits as P + PSYMCRYPT_INT piIntQ[2] = { NULL, NULL }; // Same number of digits as Q + + // Elements modulo P + PSYMCRYPT_MODELEMENT peResP = NULL; + + // Elements modulo Q + PSYMCRYPT_MODELEMENT peR = NULL; + PSYMCRYPT_MODELEMENT peS = NULL; + PSYMCRYPT_MODELEMENT peT = NULL; // Temp + + UNREFERENCED_PARAMETER( flags ); + + // Make sure that the key may be used in DSA + if ( ((pKey->fAlgorithmInfo & SYMCRYPT_FLAG_DLKEY_DSA) == 0) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Make sure that the group has a prime q, and we are not using a named DH safe-prime group + if (!pDlgroup->fHasPrimeQ || pDlgroup->isSafePrimeGroup) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Calculate the digit sizes + ndIntLarge = SymCryptDigitsFromBits( (UINT32)cbHashValue * 8 ); + ndIntLarge = SYMCRYPT_MAX( ndIntLarge, SymCryptDigitsFromBits( (UINT32)cbSignature * 4 ) ); // pbSignature contains (R,S) + + // Calculate the sizes of temp objects + cbIntLarge = SymCryptSizeofIntFromDigits(ndIntLarge); + cbIntQ = SymCryptSizeofIntFromDigits(nDigitsOfQ); + cbIntP = SymCryptSizeofIntFromDigits(nDigitsOfP); + + cbModelementP = SymCryptSizeofModElementFromModulus( pDlgroup-> pmP ); + cbModelementQ = SymCryptSizeofModElementFromModulus( pDlgroup-> pmQ ); + + // + // From symcrypt_internal.h we have: + // - sizeof results are upper bounded by 2^19 + // - SYMCRYPT_SCRATCH_BYTES results are upper bounded by 2^27 (including RSA and ECURVE) + // Thus the following calculation does not overflow cbScratch. + // + cbScratch = cbIntLarge + cbIntP + 2*cbIntQ + cbModelementP + 3*cbModelementQ + + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_INT_DIVMOD(nDigitsOfP,nDigitsOfQ), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigitsOfQ ), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigitsOfP ), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_MODMULTIEXP( SymCryptModulusDigitsizeOfObject(pDlgroup->pmP), 2, pDlgroup->nBitsOfQ ), + SYMCRYPT_SCRATCH_BYTES_FOR_MODINV( nDigitsOfQ ) )))); + pbScratch = SymCryptCallbackAlloc( cbScratch ); + if (pbScratch==NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + // Create the objects + pbScratchInternal = pbScratch; + cbScratchInternal = cbScratch; + + piIntLarge = SymCryptIntCreate(pbScratchInternal, cbIntLarge, ndIntLarge); pbScratchInternal += cbIntLarge; cbScratchInternal -= cbIntLarge; + piIntP = SymCryptIntCreate(pbScratchInternal, cbIntP, nDigitsOfP); pbScratchInternal += cbIntP; cbScratchInternal -= cbIntP; + piIntQ[0] = SymCryptIntCreate(pbScratchInternal, cbIntQ, nDigitsOfQ); pbScratchInternal += cbIntQ; cbScratchInternal -= cbIntQ; + piIntQ[1] = SymCryptIntCreate(pbScratchInternal, cbIntQ, nDigitsOfQ); pbScratchInternal += cbIntQ; cbScratchInternal -= cbIntQ; + + peResP = SymCryptModElementCreate(pbScratchInternal, cbModelementP, pDlgroup->pmP); pbScratchInternal += cbModelementP; cbScratchInternal -= cbModelementP; + + peR = SymCryptModElementCreate(pbScratchInternal, cbModelementQ, pDlgroup->pmQ); pbScratchInternal += cbModelementQ; cbScratchInternal -= cbModelementQ; + peS = SymCryptModElementCreate(pbScratchInternal, cbModelementQ, pDlgroup->pmQ); pbScratchInternal += cbModelementQ; cbScratchInternal -= cbModelementQ; + peT = SymCryptModElementCreate(pbScratchInternal, cbModelementQ, pDlgroup->pmQ); pbScratchInternal += cbModelementQ; cbScratchInternal -= cbModelementQ; + + // Get R + scError = SymCryptIntSetValue( pbSignature, cbSignature / 2, format, piIntLarge ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Check if R is less than Q + if ( !SymCryptIntIsLessThan( piIntLarge, SymCryptIntFromModulus( pDlgroup->pmQ ) ) ) + { + goto cleanup; + } + + // R mod Q (use piIntQ[0] as temp space) + scError = SymCryptIntCopyMixedSize( piIntLarge, piIntQ[0] ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + // This should never fail here as we verified that IntLarge is less than Q + goto cleanup; + } + SymCryptIntToModElement( piIntQ[0], pDlgroup->pmQ, peR, pbScratchInternal, cbScratchInternal ); + + // Check if R is zero + if (SymCryptModElementIsZero( pDlgroup->pmQ, peR )) + { + goto cleanup; + } + + // Get S + scError = SymCryptIntSetValue( pbSignature + cbSignature / 2, cbSignature / 2, format, piIntLarge ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Check if S is less than Q + if ( !SymCryptIntIsLessThan( piIntLarge, SymCryptIntFromModulus( pDlgroup->pmQ ) ) ) + { + goto cleanup; + } + + // S mod Q (use piIntQ[0] as temp space) + scError = SymCryptIntCopyMixedSize( piIntLarge, piIntQ[0] ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + // This should never fail here as we verified that IntLarge is less than Q + goto cleanup; + } + SymCryptIntToModElement( piIntQ[0], pDlgroup->pmQ, peS, pbScratchInternal, cbScratchInternal ); + + // Check if S is zero + if (SymCryptModElementIsZero( pDlgroup->pmQ, peS )) + { + goto cleanup; + } + + // Calculate 1/S mod Q + // S is part of the signature and therefore not a secret. + // We mark it public to avoid the use of random blinding, which would require a source of randomness + // just to verify a DSA signature. + scError = SymCryptModInv( pDlgroup->pmQ, peS, peS, SYMCRYPT_FLAG_DATA_PUBLIC, pbScratchInternal, cbScratchInternal ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Get the message into a modelement + scError = SymCryptDsaTruncateHash( + pDlgroup, + pbHashValue, + cbHashValue, + flags, + peT, + piIntLarge, + piIntQ[0], + pbScratchInternal, + cbScratchInternal ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // Calculate U1 = Hash(M)/S modQ + SymCryptModMul( + pDlgroup->pmQ, + peT, + peS, + peT, + pbScratchInternal, + cbScratchInternal ); + + // Convert U1 to integer + SymCryptModElementToInt( + pDlgroup->pmQ, + peT, + piIntQ[0], + pbScratchInternal, + cbScratchInternal ); + + // Calculate U2 = R/S modQ + SymCryptModMul( + pDlgroup->pmQ, + peR, + peS, + peT, + pbScratchInternal, + cbScratchInternal ); + + // Convert U2 to integer + SymCryptModElementToInt( + pDlgroup->pmQ, + peT, + piIntQ[1], + pbScratchInternal, + cbScratchInternal ); + + // Arrange the pointers for v = G^U1 * Y^U2 + peBases[0] = pDlgroup->peG; + peBases[1] = pKey->pePublicKey; + + // v = G^U1 * Y^U2 + scError = SymCryptModMultiExp( + pDlgroup->pmP, + peBases, + piIntQ, + 2, + pDlgroup->nBitsOfQ, + SYMCRYPT_FLAG_DATA_PUBLIC, + peResP, + pbScratchInternal, + cbScratchInternal ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // Convert V to a modelement modulo Q + SymCryptModElementToInt( + pDlgroup->pmP, + peResP, + piIntP, + pbScratchInternal, + cbScratchInternal ); + SymCryptIntDivMod( + piIntP, + SymCryptDivisorFromModulus( pDlgroup->pmQ ), + NULL, + piIntQ[0], + pbScratchInternal, + cbScratchInternal ); + SymCryptIntToModElement( + piIntQ[0], + pDlgroup->pmQ, + peT, + pbScratchInternal, + cbScratchInternal ); + + // Comparison V = R + if (SymCryptModElementIsEqual( pDlgroup->pmQ, peT, peR )) + { + fValidSignature = TRUE; + } + + +cleanup: + + if (!fValidSignature) + { + scError = SYMCRYPT_SIGNATURE_VERIFICATION_FAILURE; + } + + if ( pbScratch != NULL ) + { + SymCryptWipe( pbScratch, cbScratch ); + SymCryptCallbackFree( pbScratch ); + } + + return scError; +} diff --git a/libs/symcrypt/lib/ec_dh.c b/libs/symcrypt/lib/ec_dh.c new file mode 100644 index 00000000000..e7b84078645 --- /dev/null +++ b/libs/symcrypt/lib/ec_dh.c @@ -0,0 +1,157 @@ +// +// ec_dh.c ECDH function +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// + +#include "precomp.h" + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEcDhSecretAgreement( + _In_ PCSYMCRYPT_ECKEY pkPrivate, + _In_ PCSYMCRYPT_ECKEY pkPublic, + SYMCRYPT_NUMBER_FORMAT format, + UINT32 flags, + _Out_writes_( cbAgreedSecret ) PBYTE pbAgreedSecret, + SIZE_T cbAgreedSecret ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PBYTE pbScratch = NULL; + SIZE_T cbScratch = 0; + SIZE_T cbScratchInternal = 0; + PBYTE pCurr = NULL; + + PCSYMCRYPT_ECURVE pCurve = NULL; + + PSYMCRYPT_ECPOINT poQ = NULL; + PBYTE pbX = NULL; + + UINT32 cbQ = 0; + UINT32 cbX = 0; + + // Make sure that the keys may be used in ECDH + if ( ((pkPrivate->fAlgorithmInfo & SYMCRYPT_FLAG_ECKEY_ECDH) == 0) || + ((pkPublic->fAlgorithmInfo & SYMCRYPT_FLAG_ECKEY_ECDH) == 0) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Make sure we only specify the correct flags and that + // there is a private key + if ( (flags != 0) || + (!pkPrivate->hasPrivateKey) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Check that the curve is the same for both keys + if ( SymCryptEcurveIsSame( pkPrivate->pCurve, pkPublic->pCurve ) ) + { + pCurve = pkPrivate->pCurve; + } + else + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Objects and scratch space size calculation + cbQ = SymCryptSizeofEcpointFromCurve( pCurve ); + cbX = SymCryptEcurveSizeofFieldElement( pCurve ); + + // Check the output buffer has the correct size + if (cbAgreedSecret != cbX) + { + scError = SYMCRYPT_WRONG_BLOCK_SIZE; + goto cleanup; + } + + cbScratchInternal = SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS(pCurve), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_SCALAR_ECURVE_OPERATIONS( pCurve ), + SYMCRYPT_SCRATCH_BYTES_FOR_GETSET_VALUE_ECURVE_OPERATIONS( pCurve ) )); + + // + // From symcrypt_internal.h we have: + // - sizeof results are upper bounded by 2^19 + // - SYMCRYPT_SCRATCH_BYTES results are upper bounded by 2^27 (including RSA and ECURVE) + // Thus the following calculation does not overflow cbScratch. + // + cbScratch = cbScratchInternal + cbQ + cbX; + + // Scratch space allocation + pbScratch = SymCryptCallbackAlloc( cbScratch ); + if ( pbScratch == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + // Creating temporaries + pCurr = pbScratch + cbScratchInternal; + poQ = SymCryptEcpointCreate( pCurr, cbQ, pCurve ); + pCurr += cbQ; + pbX = pCurr; + + SYMCRYPT_ASSERT( poQ != NULL); + + // Make sure that the public key is not the zero point + // No need to check that the point is on the curve; that check is done when the + // public key is created. + if (SymCryptEcpointIsZero(pCurve, pkPublic->poPublicKey, pbScratch, cbScratchInternal)) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Calculate the secret + // Always do low order clearing by multiplying by the cofactor. + // Note: the internal format of piPrivateKey is "DivH", so we + // get the correct result. + scError = SymCryptEcpointScalarMul( + pCurve, + pkPrivate->piPrivateKey, + pkPublic->poPublicKey, + SYMCRYPT_FLAG_ECC_LL_COFACTOR_MUL, + poQ, + pbScratch, + cbScratchInternal ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Check if the result is the identity point + if ( SymCryptEcpointIsZero( + pCurve, + poQ, + pbScratch, + cbScratchInternal ) ) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + // Get the x from poQ + scError = SymCryptEcpointGetValue( pCurve, poQ, format, SYMCRYPT_ECPOINT_FORMAT_X, pbX, cbX, 0, pbScratch, cbScratchInternal); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Store it in the destination + memcpy( pbAgreedSecret, pbX, cbX); + +cleanup: + if ( pbScratch != NULL ) + { + SymCryptWipe( pbScratch, cbScratch ); + SymCryptCallbackFree( pbScratch ); + } + + return scError; +} diff --git a/libs/symcrypt/lib/ec_dispatch.c b/libs/symcrypt/lib/ec_dispatch.c new file mode 100644 index 00000000000..02229ac5efc --- /dev/null +++ b/libs/symcrypt/lib/ec_dispatch.c @@ -0,0 +1,300 @@ +// +// ec_dispatch.c Dispatch file for elliptic curve crypto functions +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// + +#include "precomp.h" + +// Table with all the pointers to SYMCRYPT_ECURVE_FUNCTIONS +const SYMCRYPT_ECURVE_FUNCTIONS SymCryptEcurveDispatchTable[] = +{ + // NULL Type + { + NULL, // SymCryptEcpointSetZeroNotImplemented, + NULL, // SymCryptEcpointSetDistinguishedPointNotImplemented, + NULL, // SymCryptEcpointSetRandomNotImplemented, + NULL, // SymCryptEcpointIsEqualNotImplemented, + NULL, // SymCryptEcpointIsZeroNotImplemented, + NULL, // SymCryptEcpointOnCurveNotImplemented, + NULL, // SymCryptEcpointAddNotImplemented, + NULL, // SymCryptEcpointAddDiffNonZeroNotImplemented, + NULL, // SymCryptEcpointDoubleNotImplemented, + NULL, // SymCryptEcpointNegateNotImplemented, + NULL, // SymCryptEcpointScalarMulNotImplemented, + NULL, // SymCryptEcpointMultiScalarMulNotImplemented, + NULL, // SymCryptEcurveFillScratchSpacesNotImplemented, + }, + // Short Weierstrass + { + SymCryptShortWeierstrassSetZero, + SymCryptShortWeierstrassSetDistinguished, + SymCryptEcpointGenericSetRandom, + SymCryptShortWeierstrassIsEqual, + SymCryptShortWeierstrassIsZero, + SymCryptShortWeierstrassOnCurve, + SymCryptShortWeierstrassAdd, + SymCryptShortWeierstrassAddDiffNonZero, + SymCryptShortWeierstrassDouble, + SymCryptShortWeierstrassNegate, + SymCryptEcpointScalarMulFixedWindow, + SymCryptEcpointMultiScalarMulWnafWithInterleaving, + SymCryptShortWeierstrassFillScratchSpaces, + }, + // Twisted Edwards + { + SymCryptTwistedEdwardsSetZero, + SymCryptTwistedEdwardsSetDistinguished, + SymCryptEcpointGenericSetRandom, + SymCryptTwistedEdwardsIsEqual, + SymCryptTwistedEdwardsIsZero, + SymCryptTwistedEdwardsOnCurve, + SymCryptTwistedEdwardsAdd, + SymCryptTwistedEdwardsAddDiffNonZero, + SymCryptTwistedEdwardsDouble, + SymCryptTwistedEdwardsNegate, + SymCryptEcpointScalarMulFixedWindow, + SymCryptEcpointMultiScalarMulWnafWithInterleaving, + SymCryptTwistedEdwardsFillScratchSpaces, + }, + // Montgomery + { + NULL, // SymCryptEcpointSetZeroNotImplemented, + SymCryptMontgomerySetDistinguished, + SymCryptEcpointGenericSetRandom, + SymCryptMontgomeryIsEqual, + SymCryptMontgomeryIsZero, + NULL, // SymCryptEcpointOnCurveNotImplemented, + NULL, // SymCryptEcpointAddNotImplemented, + NULL, // SymCryptEcpointAddDiffNonZeroNotImplemented, + NULL, // SymCryptEcpointDoubleNotImplemented, + NULL, // SymCryptEcpointNegateNotImplemented, + SymCryptMontgomeryPointScalarMul, + NULL, // SymCryptEcpointMultiScalarMulNotImplemented, + SymCryptMontgomeryFillScratchSpaces, + }, + // Short Weierstrass with A==-3 + { + SymCryptShortWeierstrassSetZero, + SymCryptShortWeierstrassSetDistinguished, + SymCryptEcpointGenericSetRandom, + SymCryptShortWeierstrassIsEqual, + SymCryptShortWeierstrassIsZero, + SymCryptShortWeierstrassOnCurve, + SymCryptShortWeierstrassAdd, + SymCryptShortWeierstrassAddDiffNonZero, + SymCryptShortWeierstrassDoubleSpecializedAm3, + SymCryptShortWeierstrassNegate, + SymCryptEcpointScalarMulFixedWindow, + SymCryptEcpointMultiScalarMulWnafWithInterleaving, + SymCryptShortWeierstrassFillScratchSpaces, + }, + // Slack to make dispatch table size a power of 2 + {NULL,}, + {NULL,}, + {NULL,}, +}; + +#define SYMCRYPT_ECURVE_DISPATCH_TABLE_SIZE (sizeof( SymCryptEcurveDispatchTable )) + +// Ensure the table size is a power of 2 +C_ASSERT( (SYMCRYPT_ECURVE_DISPATCH_TABLE_SIZE & (SYMCRYPT_ECURVE_DISPATCH_TABLE_SIZE - 1)) == 0 ); + +// For now the ECurve type encodes the index into this dispatch table, so we just mask by the size of the table +// +// We could instead encode the absolute offset into the table in the type field (similar to the Modulus dispatch table), +// and this mask would be multiplied by SYMCRYPT_ECURVE_FUNCTIONS_SIZE +#define SYMCRYPT_ECURVE_DISPATCH_TABLE_MASK ((SYMCRYPT_ECURVE_DISPATCH_TABLE_SIZE / SYMCRYPT_ECURVE_FUNCTIONS_SIZE)-1) + +// We mask to constrain the unpredictable behaviour in the case of memory corruption; we do not want to interpret some data +// beyond the end of the dispatch table as function pointers +#define SYMCRYPT_ECURVE_CALL(v) (SymCryptEcurveDispatchTable[SYMCRYPT_FORCE_READ32(&(v)->type) & SYMCRYPT_ECURVE_DISPATCH_TABLE_MASK]). + +// We read the curve's internal type with a 32b read so it must be 4 bytes large +C_ASSERT(sizeof(((PCSYMCRYPT_ECURVE)0)->type) == 4); + +// Main functions +SYMCRYPT_DISABLE_CFG +VOID +SYMCRYPT_CALL +SymCryptEcpointSetZero( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ECURVE_CALL( pCurve ) setZeroFunc( pCurve, poDst, pbScratch, cbScratch ); +} + +SYMCRYPT_DISABLE_CFG +VOID +SYMCRYPT_CALL +SymCryptEcpointSetDistinguishedPoint( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ECURVE_CALL( pCurve ) setDistinguishedFunc( pCurve, poDst, pbScratch, cbScratch ); +} + +SYMCRYPT_DISABLE_CFG +VOID +SYMCRYPT_CALL +SymCryptEcpointSetRandom( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Out_ PSYMCRYPT_INT piScalar, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ECURVE_CALL( pCurve ) setRandomFunc( pCurve, piScalar, poDst, pbScratch, cbScratch ); +} + +SYMCRYPT_DISABLE_CFG +UINT32 +SYMCRYPT_CALL +SymCryptEcpointIsEqual( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc1, + _In_ PCSYMCRYPT_ECPOINT poSrc2, + UINT32 flags, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + return SYMCRYPT_ECURVE_CALL( pCurve ) isEqualFunc( pCurve, poSrc1, poSrc2, flags, pbScratch, cbScratch ); +} + +SYMCRYPT_DISABLE_CFG +UINT32 +SYMCRYPT_CALL +SymCryptEcpointIsZero( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + return SYMCRYPT_ECURVE_CALL( pCurve ) isZeroFunc( pCurve, poSrc, pbScratch, cbScratch ); +} + +SYMCRYPT_DISABLE_CFG +UINT32 +SYMCRYPT_CALL +SymCryptEcpointOnCurve( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + return SYMCRYPT_ECURVE_CALL( pCurve ) onCurveFunc( pCurve, poSrc, pbScratch, cbScratch ); +} + +SYMCRYPT_DISABLE_CFG +VOID +SYMCRYPT_CALL +SymCryptEcpointAdd( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc1, + _In_ PCSYMCRYPT_ECPOINT poSrc2, + _Out_ PSYMCRYPT_ECPOINT poDst, + UINT32 flags, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ECURVE_CALL( pCurve ) addFunc( pCurve, poSrc1, poSrc2, poDst, flags, pbScratch, cbScratch ); +} + +SYMCRYPT_DISABLE_CFG +VOID +SYMCRYPT_CALL +SymCryptEcpointAddDiffNonZero( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc1, + _In_ PCSYMCRYPT_ECPOINT poSrc2, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ECURVE_CALL( pCurve ) addDiffFunc( pCurve, poSrc1, poSrc2, poDst, pbScratch, cbScratch ); +} + +SYMCRYPT_DISABLE_CFG +VOID +SYMCRYPT_CALL +SymCryptEcpointDouble( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_ PSYMCRYPT_ECPOINT poDst, + UINT32 flags, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ECURVE_CALL( pCurve ) doubleFunc( pCurve, poSrc, poDst, flags, pbScratch, cbScratch ); +} + +SYMCRYPT_DISABLE_CFG +VOID +SYMCRYPT_CALL +SymCryptEcpointNegate( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Inout_ PSYMCRYPT_ECPOINT poSrc, + UINT32 mask, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ECURVE_CALL( pCurve ) negateFunc( pCurve, poSrc, mask, pbScratch, cbScratch ); +} + +SYMCRYPT_DISABLE_CFG +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEcpointScalarMul( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_INT piScalar, + _In_opt_ + PCSYMCRYPT_ECPOINT poSrc, + UINT32 flags, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + return SYMCRYPT_ECURVE_CALL( pCurve ) scalarMulFunc( pCurve, piScalar, poSrc, flags, poDst, pbScratch, cbScratch ); +} + +SYMCRYPT_DISABLE_CFG +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEcpointMultiScalarMul( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_INT * piSrcScalarArray, + _In_ PCSYMCRYPT_ECPOINT * poSrcEcpointArray, + UINT32 nPoints, + UINT32 flags, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + return SYMCRYPT_ECURVE_CALL( pCurve ) multiScalarMulFunc( pCurve, piSrcScalarArray, poSrcEcpointArray, nPoints, flags, poDst, pbScratch, cbScratch ); +} + +SYMCRYPT_DISABLE_CFG +VOID +SYMCRYPT_CALL +SymCryptEcurveFillScratchSpaces( + _Inout_ PSYMCRYPT_ECURVE pCurve ) +{ + SYMCRYPT_ECURVE_CALL( pCurve ) fillScratchSpacesFunc( pCurve ); +} diff --git a/libs/symcrypt/lib/ec_dsa.c b/libs/symcrypt/lib/ec_dsa.c new file mode 100644 index 00000000000..b9da0063e27 --- /dev/null +++ b/libs/symcrypt/lib/ec_dsa.c @@ -0,0 +1,694 @@ +// +// ec_dsa.c ECDSA functions +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// + +#include "precomp.h" + +/* + Sections 7.2.7 and 7.2.8 of the 29 August 2000 + IEEE Standard Specifications for Public-Key Cryptography, + IEEE Std 1363-2000, list DSA versions of the elliptic + curve signature and verification primitives. + This file has draft interfaces, + + 7.2.7 ECSP_DSA (pages 35-36) + + Inputs: + E -- An elliptic curve. + G (generator) -- A point on E of prime order r. + r -- See G. + s -- A secret exponent, 1 <= s < r (Private key) + msghash -- Hash of the message being signed. + + Outputs: + c, d -- Two integers in the interval [1, r-1] + + Algorithm: + 1) Generate random exponent k, 1 <= k < r, + to be kept from adversary. + Compute KG = k*G in E. + Note KG <> (point at infinity). + 2) Convert x(KG) (an element of GF(q)) + to an integer FE2IP(x(KG)). + Let c = FE2IP(x(KG)) (mod r). + 3) Compute d = (msghash + s*c)/k (mod r). + 4) If c == 0 or d == 0, return to 1). + 5) Output c and d as integers. + + 7.2.8 ECVP_DSA + + Inputs: + E, G, r, msghash -- Same as in ECSP_DSA. + W -- The signer's public key. Equal to + s*G where s was passed to ECSP_DSA. + c, d -- A signature to be checked. + + Output: + TRUE if signature OK, else FALSE. + + Algorithm: + 1) If c or d is not in [1, r-1], return FALSE. + 2) Compute h1 = msghash/d (mod r) + and h2 = c/d (mod r). + 3) Compute P = h1*G + h2*W. + If P == (point at infinity), return FALSE. + 4) If c == FE2IP(x(P)) mod r, return TRUE. + Otherwise return FALSE. + +FE2IP is a P1363 function that casts a field element to an +integer (MSB_FIRST). See Section 5.5.5 of P1363. +*/ + +// +// Truncating function according to the standard or +// the original CNG implementation: +// +// Initially both implementations truncate the last **bytes** +// of the hash that are over the group byte length. Then if +// the bit length of the hash is still bigger than the bit +// length of the group order, ... +// +// 1. According to the X9.62 standard, we do an appropriate right shift to the entire hash. +// An example of this is a 160-bit hash, but a 113-bit subgroup order. For this case: +// a. We would truncate cbHash to (113 + 7) / 8 = 15 bytes. +// b. Since 15*8 = 120 > 113 we need to right-shift by 7 bits. +// 2. According to the original CNG implementation, we mask an appropriate number of the +// topmost bits of the hash. +// In the same example as before we would zero out the top 7 bits. +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEcDsaTruncateHash( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_reads_bytes_( cbHashValue ) PCBYTE pbHashValue, + SIZE_T cbHashValue, + UINT32 flags, + _Out_ PSYMCRYPT_MODELEMENT peMsghash, + _Out_ PSYMCRYPT_INT piTmp, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + UINT32 uiBitsizeOfTmp = 0; + UINT32 uiBitsizeOfGroup = 0; + + // Make sure that only the correct flags are set + if ( (flags & ~SYMCRYPT_FLAG_ECDSA_NO_TRUNCATION) != 0 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Get the bitsize of the group order + uiBitsizeOfGroup = SymCryptEcurveBitsizeofGroupOrder( pCurve ); + + // Truncate the last bytes of the hash + if (cbHashValue*8 > uiBitsizeOfGroup) + { + cbHashValue = (uiBitsizeOfGroup + 7)/8; + } + + // Get the value of msghash + scError = SymCryptIntSetValue( pbHashValue, cbHashValue, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, piTmp ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Get the bit size of the hash + uiBitsizeOfTmp = (UINT32)cbHashValue * 8; + + // If SYMCRYPT_FLAG_ECDSA_NO_TRUNCATION is set, we don't do hash truncation. + // The caller can do their own truncation before calling into Symcrypt. + if ( ( flags & SYMCRYPT_FLAG_ECDSA_NO_TRUNCATION ) == 0) + { + // ******** Standard truncation ************** + // Shift right if needed + if ( uiBitsizeOfTmp > uiBitsizeOfGroup ) + { + SymCryptIntDivPow2( piTmp, uiBitsizeOfTmp - uiBitsizeOfGroup, piTmp ); + } + } + + SymCryptIntToModElement( piTmp, pCurve->GOrd, peMsghash, pbScratch, cbScratch ); // msghash mod r + +cleanup: + return scError; +} + +#define SYMCRYPT_MAX_ECDSA_SIGNATURE_COUNT (100) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEcDsaSignEx( + _In_ PCSYMCRYPT_ECKEY pKey, + _In_reads_bytes_( cbHashValue ) PCBYTE pbHashValue, + SIZE_T cbHashValue, + _In_opt_ PCSYMCRYPT_INT piK, + SYMCRYPT_NUMBER_FORMAT format, + UINT32 flags, + _Out_writes_bytes_( cbSignature ) PBYTE pbSignature, + SIZE_T cbSignature ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PBYTE pbScratch = NULL; + SIZE_T cbScratch = 0; + SIZE_T cbScratchInternal = 0; + PBYTE pCurr = NULL; + + PCSYMCRYPT_ECURVE pCurve = pKey->pCurve; + + PSYMCRYPT_INT piTmp = NULL; + PSYMCRYPT_INT piMul = NULL; + PSYMCRYPT_ECPOINT poKG = NULL; + + PSYMCRYPT_MODELEMENT peMsghash = NULL; + PSYMCRYPT_MODELEMENT peSigC = NULL; + PSYMCRYPT_MODELEMENT peSigD = NULL; + PSYMCRYPT_MODELEMENT peTmp = NULL; + + PBYTE pbX = NULL; + + UINT32 nDigitsInt = 0; + UINT32 nDigitsMul = 0; + + UINT32 cbInt = 0; + UINT32 cbMul = 0; + UINT32 cbKG = 0; + UINT32 cbRs = 0; + UINT32 cbX = 0; + + UINT32 signatureCount = 0; + UINT32 allowedFlags = SYMCRYPT_FLAG_ECDSA_NO_TRUNCATION | SYMCRYPT_FLAG_DATA_PUBLIC; + UINT32 publicFlag = flags & SYMCRYPT_FLAG_DATA_PUBLIC; + UINT32 truncationFlag = flags & SYMCRYPT_FLAG_ECDSA_NO_TRUNCATION; + + // Make sure that the key may be used in ECDSA + if ( ((pKey->fAlgorithmInfo & SYMCRYPT_FLAG_ECKEY_ECDSA) == 0) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Make sure only allowed flags are specified and + // there is a private key + if ( ((flags & ~(allowedFlags)) != 0) || + (!pKey->hasPrivateKey) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Calculating the digits for the temporary integers + nDigitsInt = pCurve->GOrdDigits; + + nDigitsMul = SymCryptEcurveDigitsofScalarMultiplier(pCurve); + + // Objects and scratch space size calculation + cbInt = SymCryptSizeofIntFromDigits( nDigitsInt ); + cbMul = SymCryptSizeofIntFromDigits( nDigitsMul ); + cbKG = SymCryptSizeofEcpointFromCurve( pCurve ); + cbRs = SymCryptSizeofModElementFromModulus( pCurve->GOrd ); + cbX = SymCryptEcurveSizeofFieldElement( pCurve ); + + cbScratchInternal = SYMCRYPT_SCRATCH_BYTES_FOR_SCALAR_ECURVE_OPERATIONS( pCurve ); + cbScratchInternal = SYMCRYPT_MAX( cbScratchInternal, SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pCurve->GOrdDigits ) ); + cbScratchInternal = SYMCRYPT_MAX( cbScratchInternal, SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pCurve->FModDigits ) ); + cbScratchInternal = SYMCRYPT_MAX( cbScratchInternal, SYMCRYPT_SCRATCH_BYTES_FOR_MODINV( pCurve->GOrdDigits ) ); + cbScratchInternal = SYMCRYPT_MAX( cbScratchInternal, SYMCRYPT_SCRATCH_BYTES_FOR_GETSET_VALUE_ECURVE_OPERATIONS( pCurve ) ); + + // + // From symcrypt_internal.h we have: + // - sizeof results are upper bounded by 2^19 + // - SYMCRYPT_SCRATCH_BYTES results are upper bounded by 2^27 (including RSA and ECURVE) + // Thus the following calculation does not overflow cbScratch. + // + cbScratch = cbScratchInternal + cbInt + cbMul + cbKG + 4*cbRs + cbX; + + // Scratch space allocation + pbScratch = SymCryptCallbackAlloc( cbScratch ); + if ( pbScratch == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + // Creating temporaries + pCurr = pbScratch + cbScratchInternal; + piTmp = SymCryptIntCreate( pCurr, cbInt, nDigitsInt ); + pCurr += cbInt; + piMul = SymCryptIntCreate( pCurr, cbMul, nDigitsMul ); + pCurr += cbMul; + poKG = SymCryptEcpointCreate( pCurr, cbKG, pCurve ); + pCurr += cbKG; + peMsghash = SymCryptModElementCreate( pCurr, cbRs, pCurve->GOrd ); + pCurr += cbRs; + peSigC = SymCryptModElementCreate( pCurr, cbRs, pCurve->GOrd ); + pCurr += cbRs; + peSigD = SymCryptModElementCreate( pCurr, cbRs, pCurve->GOrd ); + pCurr += cbRs; + peTmp = SymCryptModElementCreate( pCurr, cbRs, pCurve->GOrd ); + pCurr += cbRs; + pbX = pCurr; + + SYMCRYPT_ASSERT( piTmp != NULL); + SYMCRYPT_ASSERT( piMul != NULL); + SYMCRYPT_ASSERT( poKG != NULL); + SYMCRYPT_ASSERT( peMsghash != NULL); + SYMCRYPT_ASSERT( peSigC != NULL); + SYMCRYPT_ASSERT( peSigD != NULL); + SYMCRYPT_ASSERT( peTmp != NULL); + + // Truncate the message according to the flags + scError = SymCryptEcDsaTruncateHash( + pCurve, + pbHashValue, + cbHashValue, + truncationFlag, + peMsghash, + piTmp, + pbScratch, + cbScratchInternal ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // + // Main loop: Stop when both c and d are not zero (unless a specific k is provided) + // + while( TRUE ) + { + if ( piK == NULL ) + { + SymCryptEcpointSetRandom( pCurve, piMul, poKG, pbScratch, cbScratchInternal ); // Generate k and k*G + SymCryptIntToModElement( piMul, pCurve->GOrd, peTmp, pbScratch, cbScratchInternal ); + } + else + { + // Ensure that piK is in the range [1, GOrd-1] + if( SymCryptIntIsEqualUint32( piK, 0 ) || + !SymCryptIntIsLessThan( piK, SymCryptIntFromModulus( pCurve->GOrd ) ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + SymCryptIntCopy( piK, piMul ); + SymCryptIntToModElement( piMul, pCurve->GOrd, peTmp, pbScratch, cbScratchInternal ); + + scError = SymCryptEcpointScalarMul( pCurve, piMul, NULL, 0, poKG, pbScratch, cbScratchInternal ); // Generate k*G + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + } + + scError = SymCryptModInv( pCurve->GOrd, peTmp, peTmp, publicFlag, pbScratch, cbScratchInternal ); // Invert k + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Get the x coordinates from KG + scError = SymCryptEcpointGetValue( + pCurve, + poKG, + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, + SYMCRYPT_ECPOINT_FORMAT_X, + pbX, + cbX, + publicFlag, + pbScratch, + cbScratchInternal ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Store c = x(KG) as an integer + scError = SymCryptModElementSetValue( pbX, cbX, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, pCurve->GOrd, peSigC, pbScratch, cbScratch ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Move the private key into peSigD + SymCryptIntToModElement( pKey->piPrivateKey, pCurve->GOrd, peSigD, pbScratch, cbScratchInternal ); + + // Multiply the private key by h since its internal format is "DivH" + for (UINT32 i=0; i<pCurve->coFactorPower; i++) + { + SymCryptModAdd( pCurve->GOrd, peSigD, peSigD, peSigD, pbScratch, cbScratchInternal ); + } + + SymCryptModMul( pCurve->GOrd, peSigC, peSigD, peSigD, pbScratch, cbScratchInternal ); // s * c + SymCryptModAdd( pCurve->GOrd, peMsghash, peSigD, peSigD, pbScratch, cbScratchInternal ); // msghash + s*c + SymCryptModMul( pCurve->GOrd, peSigD, peTmp, peSigD, pbScratch, cbScratchInternal ); // ( msghash + s*c ) / k + + if ( !( SymCryptModElementIsZero( pCurve->GOrd, peSigC ) | + SymCryptModElementIsZero( pCurve->GOrd, peSigD ) ) ) + { + break; + } + + if (piK != NULL) + { + // piK resulted in 0 signature + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + signatureCount++; + if ( signatureCount >= SYMCRYPT_MAX_ECDSA_SIGNATURE_COUNT ) + { + // We have not generated a non-zero signature after SYMCRYPT_MAX_ECDSA_SIGNATURE_COUNT attempts; + // Something is wrong with the group setup + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + } + + // Output c + scError = SymCryptModElementGetValue( pCurve->GOrd, peSigC, pbSignature, cbSignature / 2, format, pbScratch, cbScratchInternal ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Output d + scError = SymCryptModElementGetValue( pCurve->GOrd, peSigD, pbSignature + cbSignature / 2, cbSignature / 2, format, pbScratch, cbScratchInternal ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + +cleanup: + if ( pbScratch != NULL ) + { + SymCryptWipe( pbScratch, cbScratch ); + SymCryptCallbackFree( pbScratch ); + } + + if (scError != SYMCRYPT_NO_ERROR) + { + SymCryptWipe( pbSignature, cbSignature ); + } + + return scError; +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEcDsaSign( + _In_ PCSYMCRYPT_ECKEY pKey, + _In_reads_bytes_( cbHashValue ) PCBYTE pbHashValue, + SIZE_T cbHashValue, + SYMCRYPT_NUMBER_FORMAT format, + UINT32 flags, + _Out_writes_bytes_( cbSignature ) PBYTE pbSignature, + SIZE_T cbSignature ) +{ + // Make sure that only the correct flags are set + if ( (flags & ~SYMCRYPT_FLAG_ECDSA_NO_TRUNCATION) != 0 ) + { + return SYMCRYPT_INVALID_ARGUMENT; + } + + // We must have a private key to perform PCT or signature + if( !pKey->hasPrivateKey || !(pKey->fAlgorithmInfo & SYMCRYPT_FLAG_ECKEY_ECDSA) ) + { + return SYMCRYPT_INVALID_ARGUMENT; + } + + // If the key was generated in SymCrypt and has not yet had a PCT performed - perform PCT before first use + SYMCRYPT_RUN_KEY_GEN_PCT( + SymCryptEcDsaPct, + pKey, + SYMCRYPT_PCT_ECDSA ); + + return SymCryptEcDsaSignEx( pKey, pbHashValue, cbHashValue, NULL, format, flags, pbSignature, cbSignature ); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEcDsaVerify( + _In_ PCSYMCRYPT_ECKEY pKey, + _In_reads_bytes_( cbHashValue ) PCBYTE pbHashValue, + SIZE_T cbHashValue, + _In_reads_bytes_( cbSignature ) PCBYTE pbSignature, + SIZE_T cbSignature, + SYMCRYPT_NUMBER_FORMAT format, + UINT32 flags ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PBYTE pbScratch = NULL; + SIZE_T cbScratch = 0; + SIZE_T cbScratchInternal = 0; + PBYTE pCurr = NULL; + BOOLEAN fValidSignature = FALSE; + + PCSYMCRYPT_ECURVE pCurve = pKey->pCurve; + + PSYMCRYPT_INT piTmp = NULL; + PSYMCRYPT_INT piMul1 = NULL; + PSYMCRYPT_INT piMul2 = NULL; + PSYMCRYPT_ECPOINT poQ1 = NULL; + PSYMCRYPT_ECPOINT poQ2 = NULL; + + PSYMCRYPT_MODELEMENT peMsghash = NULL; + PSYMCRYPT_MODELEMENT peSigC = NULL; + PSYMCRYPT_MODELEMENT peSigD = NULL; + PSYMCRYPT_MODELEMENT peTmp = NULL; + + PBYTE pbX = NULL; + PCSYMCRYPT_ECPOINT poTable[2] = { 0 }; + PCSYMCRYPT_INT piTable[2] = { 0 }; + + UINT32 nDigitsInt = 0; + UINT32 nDigitsMul = 0; + + UINT32 cbInt = 0; + UINT32 cbMul = 0; + UINT32 cbKG = 0; + UINT32 cbRs = 0; + UINT32 cbX = 0; + + // Make sure that the key may be used in ECDSA + if ( ((pKey->fAlgorithmInfo & SYMCRYPT_FLAG_ECKEY_ECDSA) == 0) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Make sure that only the correct flags are set + if ( (flags & ~SYMCRYPT_FLAG_ECDSA_NO_TRUNCATION) != 0 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Calculating the digits for the temporary integer + nDigitsInt = SYMCRYPT_MAX( pCurve->FModDigits, pCurve->GOrdDigits ); + nDigitsInt = SYMCRYPT_MAX( nDigitsInt, SymCryptDigitsFromBits( (UINT32)cbSignature * 4 ) ); // pbSignature contains (c,d) + + nDigitsMul = SymCryptEcurveDigitsofScalarMultiplier(pCurve); + + // Objects and scratch space size calculation + cbInt = SymCryptSizeofIntFromDigits( nDigitsInt ); + cbMul = SymCryptSizeofIntFromDigits( nDigitsMul ); + cbKG = SymCryptSizeofEcpointFromCurve( pCurve ); + cbRs = SymCryptSizeofModElementFromModulus( pCurve->GOrd ); + cbX = SymCryptEcurveSizeofFieldElement( pCurve ); + + cbScratchInternal = SYMCRYPT_SCRATCH_BYTES_FOR_MULTI_SCALAR_ECURVE_OPERATIONS( pCurve, 2 ); + cbScratchInternal = SYMCRYPT_MAX( cbScratchInternal, SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pCurve->GOrdDigits ) ); + cbScratchInternal = SYMCRYPT_MAX( cbScratchInternal, SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pCurve->FModDigits ) ); + cbScratchInternal = SYMCRYPT_MAX( cbScratchInternal, SYMCRYPT_SCRATCH_BYTES_FOR_MODINV( pCurve->GOrdDigits ) ); + cbScratchInternal = SYMCRYPT_MAX( cbScratchInternal, SYMCRYPT_SCRATCH_BYTES_FOR_GETSET_VALUE_ECURVE_OPERATIONS( pCurve ) ); + cbScratchInternal = SYMCRYPT_MAX( cbScratchInternal, SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS( pCurve ) ); + + // + // From symcrypt_internal.h we have: + // - sizeof results are upper bounded by 2^19 + // - SYMCRYPT_SCRATCH_BYTES results are upper bounded by 2^27 (including RSA and ECURVE) + // Thus the following calculation does not overflow cbScratch. + // + cbScratch = cbScratchInternal + cbInt + 2*cbMul + 2*cbKG + 4*cbRs + cbX; + + // Scratch space allocation + pbScratch = SymCryptCallbackAlloc( cbScratch ); + if ( pbScratch == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + // Creating temporaries + pCurr = pbScratch + cbScratchInternal; + piTmp = SymCryptIntCreate( pCurr, cbInt, nDigitsInt ); + pCurr += cbInt; + piMul1 = SymCryptIntCreate( pCurr, cbMul, nDigitsMul ); + pCurr += cbMul; + piMul2 = SymCryptIntCreate( pCurr, cbMul, nDigitsMul ); + pCurr += cbMul; + poQ1 = SymCryptEcpointCreate( pCurr, cbKG, pCurve ); + pCurr += cbKG; + poQ2 = SymCryptEcpointCreate( pCurr, cbKG, pCurve ); + pCurr += cbKG; + peMsghash = SymCryptModElementCreate( pCurr, cbRs, pCurve->GOrd ); + pCurr += cbRs; + peSigC = SymCryptModElementCreate( pCurr, cbRs, pCurve->GOrd ); + pCurr += cbRs; + peSigD = SymCryptModElementCreate( pCurr, cbRs, pCurve->GOrd ); + pCurr += cbRs; + peTmp = SymCryptModElementCreate( pCurr, cbRs, pCurve->GOrd ); + pCurr += cbRs; + pbX = pCurr; + + SYMCRYPT_ASSERT( piTmp != NULL); + SYMCRYPT_ASSERT( piMul1 != NULL); + SYMCRYPT_ASSERT( piMul2 != NULL); + SYMCRYPT_ASSERT( poQ1 != NULL); + SYMCRYPT_ASSERT( poQ2 != NULL); + SYMCRYPT_ASSERT( peMsghash != NULL); + SYMCRYPT_ASSERT( peSigC != NULL); + SYMCRYPT_ASSERT( peSigD != NULL); + SYMCRYPT_ASSERT( peTmp != NULL); + + // Get c + scError = SymCryptIntSetValue( pbSignature, cbSignature / 2, format, piTmp ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Check if c is less than r + if ( !SymCryptIntIsLessThan( piTmp, SymCryptIntFromModulus( pCurve->GOrd ) ) ) + { + goto cleanup; + } + + // c mod r + SymCryptIntToModElement( piTmp, pCurve->GOrd, peSigC, pbScratch, cbScratchInternal ); + + // Check if c is zero + if (SymCryptModElementIsZero( pCurve->GOrd, peSigC )) + { + goto cleanup; + } + + // Get d + scError = SymCryptIntSetValue( pbSignature + cbSignature / 2, cbSignature / 2, format, piTmp ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Check if d is less than r + if ( !SymCryptIntIsLessThan( piTmp, SymCryptIntFromModulus( pCurve->GOrd ) ) ) + { + goto cleanup; + } + + // d mod r + SymCryptIntToModElement( piTmp, pCurve->GOrd, peSigD, pbScratch, cbScratchInternal ); + + // Check if d is zero + if (SymCryptModElementIsZero( pCurve->GOrd, peSigD )) + { + goto cleanup; + } + + // Calculate 1/d mod r + // The D value is not secret; it is part of the signature. + // We mark it public to avoid the use of random blinding, which would require a source of randomness + // just to verify an ECDSA signature. + scError = SymCryptModInv( pCurve->GOrd, peSigD, peSigD, SYMCRYPT_FLAG_DATA_PUBLIC, pbScratch, cbScratchInternal ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Truncate the message according to the flags + scError = SymCryptEcDsaTruncateHash( + pCurve, + pbHashValue, + cbHashValue, + flags, + peMsghash, + piTmp, + pbScratch, + cbScratchInternal ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + SymCryptModMul( pCurve->GOrd, peMsghash, peSigD, peMsghash, pbScratch, cbScratchInternal ); // msghash / d = h1 + SymCryptModMul( pCurve->GOrd, peSigC, peSigD, peTmp, pbScratch, cbScratchInternal ); // c / d = h2 + + SymCryptModElementToInt( pCurve->GOrd, peMsghash, piMul1, pbScratch, cbScratchInternal ); + SymCryptModElementToInt( pCurve->GOrd, peTmp, piMul2, pbScratch, cbScratchInternal ); + + // h1*G + h2*W + piTable[0] = piMul1; + piTable[1] = piMul2; + + poTable[0] = NULL; // The first base point is the generator G of the group + poTable[1] = pKey->poPublicKey; + + scError = SymCryptEcpointMultiScalarMul( pCurve, piTable, poTable, 2, SYMCRYPT_FLAG_DATA_PUBLIC, poQ1, pbScratch, cbScratchInternal ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Check for point at infinity + if ( SymCryptEcpointIsZero( pCurve, poQ1, pbScratch, cbScratchInternal ) ) + { + goto cleanup; + } + + // Get the x from poQ1 + scError = SymCryptEcpointGetValue( pCurve, poQ1, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, SYMCRYPT_ECPOINT_FORMAT_X, pbX, cbX, SYMCRYPT_FLAG_DATA_PUBLIC, pbScratch, cbScratchInternal); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Store it in a big enough INT + scError = SymCryptIntSetValue( pbX, cbX, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, piTmp ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + SymCryptIntToModElement( piTmp, pCurve->GOrd, peTmp, pbScratch, cbScratchInternal ); // x mod r + + // Comparison c = x + if (SymCryptModElementIsEqual( pCurve->GOrd, peSigC, peTmp )) + { + fValidSignature = TRUE; + } + +cleanup: + + if (!fValidSignature) + { + scError = SYMCRYPT_SIGNATURE_VERIFICATION_FAILURE; + } + + if ( pbScratch != NULL ) + { + SymCryptWipe( pbScratch, cbScratch ); + SymCryptCallbackFree( pbScratch ); + } + + return scError; +} diff --git a/libs/symcrypt/lib/ec_internal_curve_params.c b/libs/symcrypt/lib/ec_internal_curve_params.c new file mode 100644 index 00000000000..4c0301f1c68 --- /dev/null +++ b/libs/symcrypt/lib/ec_internal_curve_params.c @@ -0,0 +1,597 @@ +// +// ec_internal_curve_params.c Parameters for internally supported curves. +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// + +#include "precomp.h" + +// Do not delete the following preprocessor directive. +// It is used for folding the parameters. +#if 1 + +/*********************************** + * * + * NIST CURVES * + * * + ***********************************/ + +static const BYTE rgbNistP192[] = { + //dwVersion + 0x01, 0x00, 0x00, 0x00, + //dwCurveType + 0x01, 0x00, 0x00, 0x00, + //dwCurveGenerationAlgId + 0x00, 0x00, 0x00, 0x00, + //cbFieldLength + 0x18, 0x00, 0x00, 0x00, + //cbSubgroupOrder + 0x18, 0x00, 0x00, 0x00, + //cbCofactor + 0x01, 0x00, 0x00, 0x00, + //cbSeed + 0x00, 0x00, 0x00, 0x00, + //p + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + //A + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFC, + //B + 0x64, 0x21, 0x05, 0x19, 0xE5, 0x9C, 0x80, 0xE7, + 0x0F, 0xA7, 0xE9, 0xAB, 0x72, 0x24, 0x30, 0x49, + 0xFE, 0xB8, 0xDE, 0xEC, 0xC1, 0x46, 0xB9, 0xB1, + //x + 0x18, 0x8D, 0xA8, 0x0E, 0xB0, 0x30, 0x90, 0xF6, + 0x7C, 0xBF, 0x20, 0xEB, 0x43, 0xA1, 0x88, 0x00, + 0xf4, 0xFF, 0x0A, 0xFD, 0x82, 0xFF, 0x10, 0x12, + //y + 0x07, 0x19, 0x2B, 0x95, 0xFF, 0xC8, 0xDA, 0x78, + 0x63, 0x10, 0x11, 0xED, 0x6B, 0x24, 0xCD, 0xD5, + 0x73, 0xF9, 0x77, 0xA1, 0x1E, 0x79, 0x48, 0x11, + //q + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x99, 0xDE, 0xF8, 0x36, + 0x14, 0x6B, 0xC9, 0xB1, 0xB4, 0xD2, 0x28, 0x31, + //h + 0x01 +}; + +static const BYTE rgbNistP224[] = { + //dwVersion + 0x01, 0x00, 0x00, 0x00, + //dwCurveType + 0x01, 0x00, 0x00, 0x00, + //dwCurveGenerationAlgId + 0x00, 0x00, 0x00, 0x00, + //cbFieldLength + 0x1C, 0x00, 0x00, 0x00, + //cbSubgroupOrder + 0x1C, 0x00, 0x00, 0x00, + //cbCofactor + 0x01, 0x00, 0x00, 0x00, + //cbSeed + 0x00, 0x00, 0x00, 0x00, + //p + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x01, + //A + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFE, + //B + 0xB4, 0x05, 0x0A, 0x85, 0x0C, 0x04, 0xB3, 0xAB, + 0xF5, 0x41, 0x32, 0x56, 0x50, 0x44, 0xB0, 0xB7, + 0xD7, 0xBF, 0xD8, 0xBA, 0x27, 0x0B, 0x39, 0x43, + 0x23, 0x55, 0xFF, 0xB4, + //x + 0xB7, 0x0E, 0x0C, 0xBD, 0x6B, 0xB4, 0xBF, 0x7F, + 0x32, 0x13, 0x90, 0xB9, 0x4A, 0x03, 0xC1, 0xD3, + 0x56, 0xC2, 0x11, 0x22, 0x34, 0x32, 0x80, 0xD6, + 0x11, 0x5C, 0x1D, 0x21, + //y + 0xBD, 0x37, 0x63, 0x88, 0xB5, 0xF7, 0x23, 0xFB, + 0x4C, 0x22, 0xDF, 0xE6, 0xCD, 0x43, 0x75, 0xA0, + 0x5A, 0x07, 0x47, 0x64, 0x44, 0xD5, 0x81, 0x99, + 0x85, 0x00, 0x7E, 0x34, + //q + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x16, 0xA2, + 0xE0, 0xB8, 0xF0, 0x3E, 0x13, 0xDD, 0x29, 0x45, + 0x5C, 0x5C, 0x2A, 0x3D, + //h + 0x01 +}; + +static const BYTE rgbNistP256[] = { + //dwVersion + 0x01, 0x00, 0x00, 0x00, + //dwCurveType + 0x01, 0x00, 0x00, 0x00, + //dwCurveGenerationAlgId + 0x00, 0x00, 0x00, 0x00, + //cbFieldLength + 0x20, 0x00, 0x00, 0x00, + //cbSubgroupOrder + 0x20, 0x00, 0x00, 0x00, + //cbCofactor + 0x01, 0x00, 0x00, 0x00, + //cbSeed + 0x00, 0x00, 0x00, 0x00, + //p + 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + //A + 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFC, + //B + 0x5A, 0xC6, 0x35, 0xD8, 0xAA, 0x3A, 0x93, 0xE7, + 0xB3, 0xEB, 0xBD, 0x55, 0x76, 0x98, 0x86, 0xBC, + 0x65, 0x1D, 0x06, 0xB0, 0xCC, 0x53, 0xB0, 0xF6, + 0x3B, 0xCE, 0x3C, 0x3E, 0x27, 0xD2, 0x60, 0x4B, + //x + 0x6B, 0x17, 0xD1, 0xF2, 0xE1, 0x2C, 0x42, 0x47, + 0xF8, 0xBC, 0xE6, 0xE5, 0x63, 0xA4, 0x40, 0xF2, + 0x77, 0x03, 0x7D, 0x81, 0x2D, 0xEB, 0x33, 0xA0, + 0xF4, 0xA1, 0x39, 0x45, 0xD8, 0x98, 0xC2, 0x96, + //y + 0x4F, 0xE3, 0x42, 0xE2, 0xFE, 0x1A, 0x7F, 0x9B, + 0x8E, 0xE7, 0xEB, 0x4A, 0x7C, 0x0F, 0x9E, 0x16, + 0x2B, 0xCE, 0x33, 0x57, 0x6B, 0x31, 0x5E, 0xCE, + 0xCB, 0xB6, 0x40, 0x68, 0x37, 0xBF, 0x51, 0xF5, + //q + 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xBC, 0xE6, 0xFA, 0xAD, 0xA7, 0x17, 0x9E, 0x84, + 0xF3, 0xB9, 0xCA, 0xC2, 0xFC, 0x63, 0x25, 0x51, + //h + 0x01 +}; + +static const BYTE rgbNistP384[] = { + //dwVersion + 0x01, 0x00, 0x00, 0x00, + //dwCurveType + 0x01, 0x00, 0x00, 0x00, + //dwCurveGenerationAlgId + 0x00, 0x00, 0x00, 0x00, + //cbFieldLength + 0x30, 0x00, 0x00, 0x00, + //cbSubgroupOrder + 0x30, 0x00, 0x00, 0x00, + //cbCofactor + 0x01, 0x00, 0x00, 0x00, + //cbSeed + 0x00, 0x00, 0x00, 0x00, + //p + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, + 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, + //A + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, + 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFC, + //B + 0xB3, 0x31, 0x2F, 0xA7, 0xE2, 0x3E, 0xE7, 0xE4, + 0x98, 0x8E, 0x05, 0x6B, 0xE3, 0xF8, 0x2D, 0x19, + 0x18, 0x1D, 0x9C, 0x6E, 0xFE, 0x81, 0x41, 0x12, + 0x03, 0x14, 0x08, 0x8F, 0x50, 0x13, 0x87, 0x5A, + 0xC6, 0x56, 0x39, 0x8D, 0x8A, 0x2E, 0xD1, 0x9D, + 0x2A, 0x85, 0xC8, 0xED, 0xD3, 0xEC, 0x2A, 0xEF, + //x + 0xAA, 0x87, 0xCA, 0x22, 0xBE, 0x8B, 0x05, 0x37, + 0x8E, 0xB1, 0xC7, 0x1E, 0xF3, 0x20, 0xAD, 0x74, + 0x6E, 0x1D, 0x3B, 0x62, 0x8B, 0xA7, 0x9B, 0x98, + 0x59, 0xF7, 0x41, 0xE0, 0x82, 0x54, 0x2A, 0x38, + 0x55, 0x02, 0xF2, 0x5D, 0xBF, 0x55, 0x29, 0x6C, + 0x3A, 0x54, 0x5E, 0x38, 0x72, 0x76, 0x0A, 0xB7, + //y + 0x36, 0x17, 0xDE, 0x4A, 0x96, 0x26, 0x2C, 0x6F, + 0x5D, 0x9E, 0x98, 0xBF, 0x92, 0x92, 0xDC, 0x29, + 0xF8, 0xF4, 0x1D, 0xBD, 0x28, 0x9A, 0x14, 0x7C, + 0xE9, 0xDA, 0x31, 0x13, 0xB5, 0xF0, 0xB8, 0xC0, + 0x0A, 0x60, 0xB1, 0xCE, 0x1D, 0x7E, 0x81, 0x9D, + 0x7A, 0x43, 0x1D, 0x7C, 0x90, 0xEA, 0x0E, 0x5F, + //q + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xC7, 0x63, 0x4D, 0x81, 0xF4, 0x37, 0x2D, 0xDF, + 0x58, 0x1A, 0x0D, 0xB2, 0x48, 0xB0, 0xA7, 0x7A, + 0xEC, 0xEC, 0x19, 0x6A, 0xCC, 0xC5, 0x29, 0x73, + //h + 0x01 +}; + +static const BYTE rgbNistP521[] = { + //dwVersion + 0x01, 0x00, 0x00, 0x00, + //dwCurveType + 0x01, 0x00, 0x00, 0x00, + //dwCurveGenerationAlgId + 0x00, 0x00, 0x00, 0x00, + //cbFieldLength + 0x42, 0x00, 0x00, 0x00, + //cbSubgroupOrder + 0x42, 0x00, 0x00, 0x00, + //cbCofactor + 0x01, 0x00, 0x00, 0x00, + //cbSeed + 0x00, 0x00, 0x00, 0x00, + //p + 0x01, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, + //A + 0x01, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFC, + //B + 0x00, 0x51, 0x95, 0x3E, 0xB9, 0x61, 0x8E, 0x1C, + 0x9A, 0x1F, 0x92, 0x9A, 0x21, 0xA0, 0xB6, 0x85, + 0x40, 0xEE, 0xA2, 0xDA, 0x72, 0x5B, 0x99, 0xB3, + 0x15, 0xF3, 0xB8, 0xB4, 0x89, 0x91, 0x8E, 0xF1, + 0x09, 0xE1, 0x56, 0x19, 0x39, 0x51, 0xEC, 0x7E, + 0x93, 0x7B, 0x16, 0x52, 0xC0, 0xBD, 0x3B, 0xB1, + 0xBF, 0x07, 0x35, 0x73, 0xDF, 0x88, 0x3D, 0x2C, + 0x34, 0xF1, 0xEF, 0x45, 0x1F, 0xD4, 0x6B, 0x50, + 0x3F, 0x00, + //x + 0x00, 0xC6, 0x85, 0x8E, 0x06, 0xB7, 0x04, 0x04, + 0xE9, 0xCD, 0x9E, 0x3E, 0xCB, 0x66, 0x23, 0x95, + 0xB4, 0x42, 0x9C, 0x64, 0x81, 0x39, 0x05, 0x3F, + 0xB5, 0x21, 0xF8, 0x28, 0xAF, 0x60, 0x6B, 0x4D, + 0x3D, 0xBA, 0xA1, 0x4B, 0x5E, 0x77, 0xEF, 0xE7, + 0x59, 0x28, 0xFE, 0x1D, 0xC1, 0x27, 0xA2, 0xFF, + 0xA8, 0xDE, 0x33, 0x48, 0xB3, 0xC1, 0x85, 0x6A, + 0x42, 0x9B, 0xF9, 0x7E, 0x7E, 0x31, 0xC2, 0xE5, + 0xBD, 0x66, + //y + 0x01, 0x18, 0x39, 0x29, 0x6A, 0x78, 0x9A, 0x3B, + 0xC0, 0x04, 0x5C, 0x8A, 0x5F, 0xB4, 0x2C, 0x7D, + 0x1B, 0xD9, 0x98, 0xF5, 0x44, 0x49, 0x57, 0x9B, + 0x44, 0x68, 0x17, 0xAF, 0xBD, 0x17, 0x27, 0x3E, + 0x66, 0x2C, 0x97, 0xEE, 0x72, 0x99, 0x5E, 0xF4, + 0x26, 0x40, 0xC5, 0x50, 0xB9, 0x01, 0x3F, 0xAD, + 0x07, 0x61, 0x35, 0x3C, 0x70, 0x86, 0xA2, 0x72, + 0xC2, 0x40, 0x88, 0xBE, 0x94, 0x76, 0x9F, 0xD1, + 0x66, 0x50, + //q + 0x01, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFA, 0x51, 0x86, 0x87, 0x83, 0xBF, 0x2F, + 0x96, 0x6B, 0x7F, 0xCC, 0x01, 0x48, 0xF7, 0x09, + 0xA5, 0xD0, 0x3B, 0xB5, 0xC9, 0xB8, 0x89, 0x9C, + 0x47, 0xAE, 0xBB, 0x6F, 0xB7, 0x1E, 0x91, 0x38, + 0x64, 0x09, + //h + 0x01 +}; + +/***************************************** +* * +* TWISTED EDWARDS CURVES * +* * +******************************************/ +static const BYTE rgbNumsP256t1[] = { + //dwVersion + 0x01, 0x00, 0x00, 0x00, + //dwCurveType + 0x02, 0x00, 0x00, 0x00, + //dwCurveGenerationAlgId + 0x00, 0x00, 0x00, 0x00, + //cbFieldLength + 0x20, 0x00, 0x00, 0x00, + //cbSubgroupOrder + 0x20, 0x00, 0x00, 0x00, + //cbCofactor + 0x01, 0x00, 0x00, 0x00, + //cbSeed + 0x00, 0x00, 0x00, 0x00, + //p + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x43, + //A + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x42, + //d + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3B, 0xEE, + //x + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0D, + //y + 0x7D, 0x0A, 0xB4, 0x1E, 0x2A, 0x12, 0x76, 0xDB, + 0xA3, 0xD3, 0x30, 0xB3, 0x9F, 0xA0, 0x46, 0xBF, + 0xBE, 0x2A, 0x6D, 0x63, 0x82, 0x4D, 0x30, 0x3F, + 0x70, 0x7F, 0x6F, 0xB5, 0x33, 0x1C, 0xAD, 0xBA, + //q + 0x3F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xBE, 0x6A, 0xA5, 0x5A, 0xD0, 0xA6, 0xBC, 0x64, + 0xE5, 0xB8, 0x4E, 0x6F, 0x11, 0x22, 0xB4, 0xAD, + //h + 0x04 +}; + +static const BYTE rgbNumsP384t1[] = { + //dwVersion + 0x01, 0x00, 0x00, 0x00, + //dwCurveType + 0x02, 0x00, 0x00, 0x00, + //dwCurveGenerationAlgId + 0x00, 0x00, 0x00, 0x00, + //cbFieldLength + 0x30, 0x00, 0x00, 0x00, + //cbSubgroupOrder + 0x30, 0x00, 0x00, 0x00, + //cbCofactor + 0x01, 0x00, 0x00, 0x00, + //cbSeed + 0x00, 0x00, 0x00, 0x00, + //p + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xC3, + //A + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFE, 0xC2, + //d + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x15, 0x8A, + //x + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, + //y + 0x74, 0x9C, 0xDA, 0xBA, 0x13, 0x6C, 0xE9, 0xB6, + 0x5B, 0xD4, 0x47, 0x17, 0x94, 0xAA, 0x61, 0x9D, + 0xAA, 0x5C, 0x7B, 0x4C, 0x93, 0x0B, 0xFF, 0x8E, + 0xBD, 0x79, 0x8A, 0x8A, 0xE7, 0x53, 0xC6, 0xD7, + 0x2F, 0x00, 0x38, 0x60, 0xFE, 0xBA, 0xBA, 0xD5, + 0x34, 0xA4, 0xAC, 0xF5, 0xFA, 0x7F, 0x5B, 0xEE, + //q + 0x3F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xEC, 0xD7, 0xD1, 0x1E, 0xD5, 0xA2, 0x59, 0xA2, + 0x5A, 0x13, 0xA0, 0x45, 0x8E, 0x39, 0xF4, 0xE4, + 0x51, 0xD6, 0xD7, 0x1F, 0x70, 0x42, 0x6E, 0x25, + //h + 0x04 +}; + +static const BYTE rgbNumsP512t1[] = { + //dwVersion + 0x01, 0x00, 0x00, 0x00, + //dwCurveType + 0x02, 0x00, 0x00, 0x00, + //dwCurveGenerationAlgId + 0x00, 0x00, 0x00, 0x00, + //cbFieldLength + 0x40, 0x00, 0x00, 0x00, + //cbSubgroupOrder + 0x40, 0x00, 0x00, 0x00, + //cbCofactor + 0x01, 0x00, 0x00, 0x00, + //cbSeed + 0x00, 0x00, 0x00, 0x00, + //p + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFD, 0xC7, + //A + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFD, 0xC6, + //d + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0xBA, 0xA8, + //x + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, + //y + 0x7D, 0x67, 0xE8, 0x41, 0xDC, 0x4C, 0x46, 0x7B, + 0x60, 0x50, 0x91, 0xD8, 0x08, 0x69, 0x21, 0x2F, + 0x9C, 0xEB, 0x12, 0x4B, 0xF7, 0x26, 0x97, 0x3F, + 0x9F, 0xF0, 0x48, 0x77, 0x9E, 0x1D, 0x61, 0x4E, + 0x62, 0xAE, 0x2E, 0xCE, 0x50, 0x57, 0xB5, 0xDA, + 0xD9, 0x6B, 0x7A, 0x89, 0x7C, 0x1D, 0x72, 0x79, + 0x92, 0x61, 0x13, 0x46, 0x38, 0x75, 0x0F, 0x4F, + 0x0C, 0xB9, 0x10, 0x27, 0x54, 0x3B, 0x1C, 0x5E, + //q + 0x3F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xA7, 0xE5, 0x08, 0x09, 0xEF, 0xDA, 0xBB, 0xB9, + 0xA6, 0x24, 0x78, 0x4F, 0x44, 0x95, 0x45, 0xF0, + 0xDC, 0xEA, 0x5F, 0xF0, 0xCB, 0x80, 0x0F, 0x89, + 0x4E, 0x78, 0xD1, 0xCB, 0x0B, 0x5F, 0x01, 0x89, + //h + 0x04 +}; + + +/************************************ +* * +* MONTGOMERY CURVES * +* * +*************************************/ + +static const BYTE rgbCurve25519[] = { + //dwVersion + 0x02, 0x00, 0x00, 0x00, + //dwCurveType + 0x03, 0x00, 0x00, 0x00, + //dwCurveGenerationAlgId + 0x00, 0x00, 0x00, 0x00, + //cbFieldLength + 0x20, 0x00, 0x00, 0x00, + //cbSubgroupOrder + 0x20, 0x00, 0x00, 0x00, + //cbCofactor + 0x01, 0x00, 0x00, 0x00, + //cbSeed + 0x00, 0x00, 0x00, 0x00, + //p + 0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xED, + //A + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x07, 0x6D, 0x06, + //B + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + //x + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, + //y + 0x20, 0xAE, 0x19, 0xA1, 0xB8, 0xA0, 0x86, 0xB4, + 0xE0, 0x1E, 0xDD, 0x2C, 0x77, 0x48, 0xD1, 0x4C, + 0x92, 0x3D, 0x4D, 0x7E, 0x6D, 0x7C, 0x61, 0xB2, + 0x29, 0xE9, 0xC5, 0xA2, 0x7E, 0xCE, 0xD3, 0xD9, + //q + 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x14, 0xDE, 0xF9, 0xDE, 0xA2, 0xF7, 0x9C, 0xD6, + 0x58, 0x12, 0x63, 0x1A, 0x5C, 0xF5, 0xD3, 0xED, + //h + 0x08, + + // Version 2 parameters + // PrivateKeyDefaultFormat + 0x03, 0x00, 0x00, 0x00, + // HighBitRestrictionNumOfBits + 0x02, 0x00, 0x00, 0x00, + // HighBitRestrictionPosition + 0xFE, 0x00, 0x00, 0x00, + // HighBitRestrictionValue + 0x01, 0x00, 0x00, 0x00, +}; + +#endif // 1 + +// Version 2 parameter extension +static const SYMCRYPT_ECURVE_PARAMS_V2_EXTENSION paramsV2ExtensionShortWeierstrass = +{ + SYMCRYPT_ECKEY_PRIVATE_FORMAT_CANONICAL, + 0, + 0, + 0, +}; + +static const SYMCRYPT_ECURVE_PARAMS_V2_EXTENSION paramsV2ExtensionTwistedEdwards = +{ + SYMCRYPT_ECKEY_PRIVATE_FORMAT_DIVH, + 0, + 0, + 0, +}; + +static const SYMCRYPT_ECURVE_PARAMS_V2_EXTENSION paramsV2ExtensionMontgomery = +{ + SYMCRYPT_ECKEY_PRIVATE_FORMAT_DIVH_TIMESH, + 0, + 0, + 0, +}; + +// Definitions +const PCSYMCRYPT_ECURVE_PARAMS SymCryptEcurveParamsNistP192 = (PCSYMCRYPT_ECURVE_PARAMS) rgbNistP192; +const PCSYMCRYPT_ECURVE_PARAMS SymCryptEcurveParamsNistP224 = (PCSYMCRYPT_ECURVE_PARAMS) rgbNistP224; +const PCSYMCRYPT_ECURVE_PARAMS SymCryptEcurveParamsNistP256 = (PCSYMCRYPT_ECURVE_PARAMS) rgbNistP256; +const PCSYMCRYPT_ECURVE_PARAMS SymCryptEcurveParamsNistP384 = (PCSYMCRYPT_ECURVE_PARAMS) rgbNistP384; +const PCSYMCRYPT_ECURVE_PARAMS SymCryptEcurveParamsNistP521 = (PCSYMCRYPT_ECURVE_PARAMS) rgbNistP521; + +const PCSYMCRYPT_ECURVE_PARAMS SymCryptEcurveParamsNumsP256t1 = (PCSYMCRYPT_ECURVE_PARAMS) rgbNumsP256t1; +const PCSYMCRYPT_ECURVE_PARAMS SymCryptEcurveParamsNumsP384t1 = (PCSYMCRYPT_ECURVE_PARAMS) rgbNumsP384t1; +const PCSYMCRYPT_ECURVE_PARAMS SymCryptEcurveParamsNumsP512t1 = (PCSYMCRYPT_ECURVE_PARAMS) rgbNumsP512t1; + +const PCSYMCRYPT_ECURVE_PARAMS SymCryptEcurveParamsCurve25519 = (PCSYMCRYPT_ECURVE_PARAMS) rgbCurve25519; + +const PCSYMCRYPT_ECURVE_PARAMS_V2_EXTENSION SymCryptEcurveParamsV2ExtensionShortWeierstrass = ¶msV2ExtensionShortWeierstrass; +const PCSYMCRYPT_ECURVE_PARAMS_V2_EXTENSION SymCryptEcurveParamsV2ExtensionTwistedEdwards = ¶msV2ExtensionTwistedEdwards; +const PCSYMCRYPT_ECURVE_PARAMS_V2_EXTENSION SymCryptEcurveParamsV2ExtensionMontgomery = ¶msV2ExtensionMontgomery; diff --git a/libs/symcrypt/lib/ec_internal_curves.c b/libs/symcrypt/lib/ec_internal_curves.c new file mode 100644 index 00000000000..d0107f431c7 --- /dev/null +++ b/libs/symcrypt/lib/ec_internal_curves.c @@ -0,0 +1,79 @@ +// +// ec_internal_curves.c Internally allocated elliptic curves. +// +// These curves are lazy-initialized. Currently only used +// for composite algorithms to avoid per-key allocation overhead. +// + +#include "precomp.h" + +static PCSYMCRYPT_ECURVE rgpCachedCurves[SYMCRYPT_CACHED_ECURVE_ID_COUNT] = { 0 }; + +static +PCSYMCRYPT_ECURVE_PARAMS +SYMCRYPT_CALL +SymCryptGetCachedEcurveParams( + SYMCRYPT_CACHED_ECURVE_ID curveId ) +{ + switch (curveId) + { + case SYMCRYPT_CACHED_ECURVE_ID_NIST_P256: + return SymCryptEcurveParamsNistP256; + case SYMCRYPT_CACHED_ECURVE_ID_NIST_P384: + return SymCryptEcurveParamsNistP384; + case SYMCRYPT_CACHED_ECURVE_ID_CURVE_25519: + return SymCryptEcurveParamsCurve25519; + default: + return NULL; + } +} + +PCSYMCRYPT_ECURVE +SYMCRYPT_CALL +SymCryptGetCachedEcurve( + SYMCRYPT_CACHED_ECURVE_ID curveId ) +{ + PCSYMCRYPT_ECURVE pCachedCurve = NULL; + PSYMCRYPT_ECURVE pNewCurve = NULL; + PSYMCRYPT_ECURVE pCurrCurve = NULL; + PCSYMCRYPT_ECURVE_PARAMS pParams = NULL; + + if ( curveId < 0 || curveId >= SYMCRYPT_CACHED_ECURVE_ID_COUNT ) + { + return NULL; + } + + pCachedCurve = (PCSYMCRYPT_ECURVE) SYMCRYPT_ATOMIC_LOADPTR_ACQUIRE( &rgpCachedCurves[curveId] ); + if ( pCachedCurve != NULL ) + { + return pCachedCurve; + } + + pParams = SymCryptGetCachedEcurveParams( curveId ); + if ( pParams == NULL ) + { + return NULL; + } + + pNewCurve = SymCryptEcurveAllocate( pParams, 0 ); + if ( pNewCurve == NULL ) + { + return NULL; + } + + pCurrCurve = SYMCRYPT_ATOMIC_CAS_PTR_ACQUIRE_RELEASE( + &rgpCachedCurves[curveId], + pNewCurve, + NULL); + + // Means the original curve was already filled + // and that our new curve was not used. So we + // free the new curve and return the existing one. + if ( pCurrCurve != NULL ) + { + SymCryptEcurveFree( pNewCurve ); + return pCurrCurve; + } + + return pNewCurve; +} diff --git a/libs/symcrypt/lib/ec_montgomery.c b/libs/symcrypt/lib/ec_montgomery.c new file mode 100644 index 00000000000..ced3fcdaafc --- /dev/null +++ b/libs/symcrypt/lib/ec_montgomery.c @@ -0,0 +1,443 @@ +// +// ec_montgomery.c Montgomery Implementation +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +VOID +SYMCRYPT_CALL +SymCryptMontgomeryFillScratchSpaces(_In_ PSYMCRYPT_ECURVE pCurve) +{ + UINT32 nDigits = SymCryptDigitsFromBits( pCurve->FModBitsize ); + UINT32 nBytes = SymCryptSizeofModElementFromModulus( pCurve->FMod ); + UINT32 nCommon = SYMCRYPT_MAX( SymCryptSizeofIntFromDigits( nDigits ), SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigits ), SYMCRYPT_SCRATCH_BYTES_FOR_MODINV( nDigits ) ) ); + UINT32 cbModElement = pCurve->cbModElement; + UINT32 nDigitsFieldLength = pCurve->FModDigits; + + // + // All the scratch space computations are upper bounded by the SizeofXXX bound (2^19) and + // the SCRATCH_BYTES_FOR_XXX bound (2^24) (see symcrypt_internal.h). + // + // One caveat is SymCryptSizeofEcpointFromCurve and SymCryptSizeofEcpointEx which calculate the + // size of EcPoint with 4 coordinates (each one a modelement of max size 2^17). Thus upper + // bounded by 2^20. + // + + pCurve->cbScratchCommon = nCommon; + pCurve->cbScratchScalar = + SymCryptSizeofIntFromDigits(nDigits) + + 6 * nBytes + + nCommon; + + pCurve->cbScratchScalarMulti = 0; + pCurve->cbScratchGetSetValue = + SymCryptSizeofEcpointEx( cbModElement, SYMCRYPT_ECPOINT_FORMAT_MAX_LENGTH ) + + 2 * cbModElement + + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigitsFieldLength ), + SYMCRYPT_SCRATCH_BYTES_FOR_MODINV( nDigitsFieldLength ) ); + + pCurve->cbScratchGetSetValue = SYMCRYPT_MAX( pCurve->cbScratchGetSetValue, SymCryptSizeofIntFromDigits( nDigits ) ); + + pCurve->cbScratchEckey = + SYMCRYPT_MAX( cbModElement + SymCryptSizeofIntFromDigits(SymCryptEcurveDigitsofScalarMultiplier(pCurve)), + SymCryptSizeofEcpointFromCurve( pCurve ) ) + + SYMCRYPT_MAX( pCurve->cbScratchScalar, pCurve->cbScratchGetSetValue ); +} + +VOID +SYMCRYPT_CALL +SymCryptMontgomerySetDistinguished( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ASSERT( SYMCRYPT_CURVE_IS_MONTGOMERY_TYPE(pCurve) ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poDst->pCurve) ); + + UNREFERENCED_PARAMETER( pbScratch ); + UNREFERENCED_PARAMETER( cbScratch ); + + SymCryptEcpointCopy( pCurve, pCurve->G, poDst ); +} + +// +// Verify poSrc1(X1, Z1) = poSrc2(X2, Z2) +// To avoid ModInv for 1/Z, we do +// X1 * Z2 = X2 * Z1 +// +// This function currently ignores the flags parameter as there is no distinction between equal and +// negative equal case in Single Projective Coordinates used in Montgomery curves. We accept the flags +// to maintain the same API as for other curves. +// +UINT32 +SYMCRYPT_CALL +SymCryptMontgomeryIsEqual( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc1, + _In_ PCSYMCRYPT_ECPOINT poSrc2, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch) +{ + PSYMCRYPT_MODELEMENT peTemp[2]; + PSYMCRYPT_MODELEMENT peSrc1X, peSrc1Z; + PSYMCRYPT_MODELEMENT peSrc2X, peSrc2Z; + PSYMCRYPT_MODULUS pmMod = pCurve->FMod; + SIZE_T nBytes; + + SYMCRYPT_ASSERT( (flags & ~(SYMCRYPT_FLAG_ECPOINT_EQUAL|SYMCRYPT_FLAG_ECPOINT_NEG_EQUAL)) == 0 ); + SYMCRYPT_ASSERT( SYMCRYPT_CURVE_IS_MONTGOMERY_TYPE(pCurve) ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poSrc1->pCurve) && SymCryptEcurveIsSame(pCurve, poSrc2->pCurve) ); + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS( pCurve ) ); + + UNREFERENCED_PARAMETER( flags ); + + nBytes = SymCryptSizeofModElementFromModulus( pmMod ); + + SYMCRYPT_ASSERT( cbScratch >= 2 * nBytes ); + + for (UINT32 i = 0; i < 2; ++i) + { + peTemp[i] = SymCryptModElementCreate( pbScratch, nBytes, pmMod ); + pbScratch += nBytes; + cbScratch -= nBytes; + } + + peSrc1X = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poSrc1 ); + peSrc1Z = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poSrc1 ); + + peSrc2X = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poSrc2 ); + peSrc2Z = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poSrc2 ); + + // peTemp[0] = X1 * Z2 + SymCryptModMul( pmMod, peSrc1X, peSrc2Z, peTemp[0], pbScratch, cbScratch ); + + // peTemp[1] = X2 * Z1 + SymCryptModMul( pmMod, peSrc2X, peSrc1Z, peTemp[1], pbScratch, cbScratch ); + + return SymCryptModElementIsEqual( pmMod, peTemp[0], peTemp[1] ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptMontgomeryIsZero( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + PCSYMCRYPT_MODULUS FMod = pCurve->FMod; + PSYMCRYPT_MODELEMENT peZ = NULL; // Pointer to Z + + SYMCRYPT_ASSERT( SYMCRYPT_CURVE_IS_MONTGOMERY_TYPE(pCurve) ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poSrc->pCurve) ); + + UNREFERENCED_PARAMETER( pbScratch ); + UNREFERENCED_PARAMETER( cbScratch ); + + // Getting pointer to Z of the source point + peZ = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poSrc ); + + return SymCryptModElementIsZero( FMod, peZ ); +} + +VOID +SymCryptMontgomeryDoubleAndAdd( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peX1, + _In_opt_ PCSYMCRYPT_MODELEMENT peZ1, + _In_ PCSYMCRYPT_MODELEMENT peA24, + _Inout_ PSYMCRYPT_MODELEMENT peX2, + _Inout_ PSYMCRYPT_MODELEMENT peZ2, + _Inout_ PSYMCRYPT_MODELEMENT peX3, + _Inout_ PSYMCRYPT_MODELEMENT peZ3, + _Inout_ PSYMCRYPT_MODELEMENT peTemp1, + _Inout_ PSYMCRYPT_MODELEMENT peTemp2, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch) +/* +We use the notation of ladd-1987-m-3, this is a generic Montgomery ladder implementation. +This is similar to RFC7748 for TLS use of curve25519, however, unlike in the RFC, we support the case when Z1 != 1. + +When it is statically known that Z1 == 1 the caller can set peZ1 to NULL to skip one redundant modular multiplication. + Note that this will be revealed through timing, so peZ1 can only be set to NULL it is not secret that Z1 == 1. + Z1 == 1 is statically known for points which have just been imported into SymCrypt (and for the distinguished point of the + curve), and this knowledge is tracked in an ecPoint's normalized flag. + +The (X,Z) values represent an x-coordinate (X/Z) but it avoids the modular division. + +The value a24 is such that 4*a24 = a+2 where a is one of the Montgomery curve parameters. +Thus, a24 = (a+2)/4. For curve25519, A = 486662, so a24 = 121666 (=0x01db42) + +Algorithm (ladd-1987-m-3), with all operations expanded + A = X2 + Z2 + AA = A^2 + B = X2 - Z2 + BB = B^2 + E = AA - BB + C = X3 + Z3 + D = X3 - Z3 + DA = D * A + CB = C * B + X5 = (DA + CB)^2 + DApCB = DA + CB + X5 = DApCB^2 + if peZ1 != NULL: + X5 = Z1 * X5 + Z5 = X1 * (DA - CB)^2 + DAmCB = DA - CB + DAmCB2 = DAmCB ^ 2 + Z5 = X1 * DAmCB2 + X4 = AA * BB + Z4 = E * (BB + a24 * E) + A24E = A24 * E + BAE = BB + A24 * E + Z4 = E * BAE + +If we write a = (X2,Z2) and b = (X3,Z3), and a-b = (X1,Z1), then this algorithm computes +(2*a) and (a+b) into (X4, Z4) and (X5,Z5) respectively. +The Montgomery ladder uses this as follows: +- Store xP and (x+1)P +- To process a 0 bit in the scalar, apply the DoubleAndAdd to (xP,(x+1)P) to get (2xP, (2x+1)P) +- To process a 1 bit in the scalar, apply the DoubleAndAdd to ((x+1)P, xP) to get ((2x+2)P, (2x+1)P) +This updates the state to either (2xP, (2x+1)P) or to ((2x+1)P, (2x+2)P) and corresponds to updating +x to either 2x or 2x+1. + +The starting value is (0,P), represented as ((1,0),(P_x,P_z) +The algorithm above, when applied to (1, 0, X, Z) produces: + A = 1, AA = 1, B = 1, BB = 1, E = 0, + C = X+Z, D = X-Z, DA = X-Z, CB = X+Z, + X5 = 4(X^2)Z, Z5 = 4X(Z^2) + X4 = 1, Z4 = 0 +for an output of (1, 0, 4(X^2)Z, 4X(Z^2)) +But (4(X^2)Z, 4X(Z^2)) is just another representation of (X,Z) as only the quotient of the two numbers is significant. +So even if an exponent starts with a bunch of 0 bits, the DoubleAndAdd-based function computes the right result in constant time. + +*/ +{ + // Temp1 = A = X2 + Z2 + SymCryptModAdd( pmMod, peX2, peZ2, peTemp1, pbScratch, cbScratch ); + + // Z2 = B = X2 - Z2 + SymCryptModSub( pmMod, peX2, peZ2, peZ2, pbScratch, cbScratch ); + + // Temp2 = C = X3 + Z3 + SymCryptModAdd( pmMod, peX3, peZ3, peTemp2, pbScratch, cbScratch ); + + // Z3 = D = X3 - Z3 + SymCryptModSub( pmMod, peX3, peZ3, peZ3, pbScratch, cbScratch ); + + // X3 = CB = C * B = Temp2 * Z2 + SymCryptModMul( pmMod, peTemp2, peZ2, peX3, pbScratch, cbScratch ); + + // Z3 = DA = D * A = Z3 * Temp1 + SymCryptModMul( pmMod, peZ3, peTemp1, peZ3, pbScratch, cbScratch ); + + // From this point on, the outputs (X5,Z5) depend only on (X3,Z3) and (X1,Z1) + // and the outputs (X4,Z4) only on (Temp1,Z2) and A24 + // We'll do the (X4,Z4) first + + // X2 = AA = A * A = Temp1 * Temp1 + SymCryptModSquare( pmMod, peTemp1, peX2, pbScratch, cbScratch ); + + // Temp1 = BB = B * B = Z2 * Z2 + SymCryptModSquare( pmMod, peZ2, peTemp1, pbScratch, cbScratch ); + + // Temp2 = E = AA - BB = X2 - Temp1 + SymCryptModSub( pmMod, peX2, peTemp1, peTemp2, pbScratch, cbScratch ); + + // X2 = X4 = AA * BB = X2 * Temp1 + SymCryptModMul( pmMod, peX2, peTemp1, peX2, pbScratch, cbScratch ); + + // Z2 = A24E = A24 * E = A24 * Temp2 + SymCryptModMul( pmMod, peA24, peTemp2, peZ2, pbScratch, cbScratch ); + + // Z2 = BAE = (BB + a24 * E) = BB + A24E = Temp1 + Z2 + SymCryptModAdd( pmMod, peTemp1, peZ2, peZ2, pbScratch, cbScratch ); + + // Z2 = Z4 = E * BAE = Temp2 + Z2 + SymCryptModMul( pmMod, peTemp2, peZ2, peZ2, pbScratch, cbScratch ); + + // Now we compute (X5, Z5) + + // Temp1 = DApCB = DA + CB = Z3 + X3 + SymCryptModAdd( pmMod, peZ3, peX3, peTemp1, pbScratch, cbScratch ); + + // Z3 = DAmCB = DA - CB = Z3 - X3 + SymCryptModSub( pmMod, peZ3, peX3, peZ3, pbScratch, cbScratch ); + + // X3 = DApCB^2 = Temp1 ^ 2 ( = X5 when (peZ1 == NULL) => Z1 == 1) + SymCryptModSquare( pmMod, peTemp1, peX3, pbScratch, cbScratch ); + + if (peZ1 != NULL) // source point is not normalized + { + // X3 = X5 = Z1 * DApCB^2 = Z1 * X3 + SymCryptModMul( pmMod, peZ1, peX3, peX3, pbScratch, cbScratch ); + } + + // Z3 = DAmCB2 = DAmCB ^ 2 = Z3 ^ 2 + SymCryptModSquare( pmMod, peZ3, peZ3, pbScratch, cbScratch ); + + // Z3 = Z5 = X1 * DAmCB2 = X1 * Z3 + SymCryptModMul( pmMod, peX1, peZ3, peZ3, pbScratch, cbScratch ); +} + +// +// Montgomery point multiplication only works on X-coordinates. +// We ignore the Y-coordinates. +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMontgomeryPointScalarMul( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_INT piScalar, + _In_opt_ + PCSYMCRYPT_ECPOINT poSrc, + UINT32 flags, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PSYMCRYPT_MODULUS pmMod; + PSYMCRYPT_MODELEMENT peX1, peZ1, peA24, peX2, peZ2, peX3, peZ3, peTemp1, peTemp2, peResult; + UINT32 i, nBytes, nDigits, cond, newcond, nCommon; + PBYTE pBegin; + SIZE_T cbAllScratch; + + SYMCRYPT_ASSERT( SYMCRYPT_CURVE_IS_MONTGOMERY_TYPE(pCurve) ); + SYMCRYPT_ASSERT( (poSrc == NULL || SymCryptEcurveIsSame(pCurve, poSrc->pCurve)) && SymCryptEcurveIsSame(pCurve, poDst->pCurve) ); + + // Make sure we only specify the correct flags + if ((flags & ~SYMCRYPT_FLAG_ECC_LL_COFACTOR_MUL) != 0) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if (poSrc == NULL) + { + poSrc = pCurve->G; + } + + // + // Set up structure for X2, Z2, X3, Z3, Temp1, and Temp2, and the scratch space. + // + pmMod = pCurve->FMod; + + nDigits = SymCryptDigitsFromBits( pCurve->FModBitsize ); + nBytes = SymCryptSizeofModElementFromModulus( pmMod ); + nCommon = SYMCRYPT_MAX( SymCryptSizeofIntFromDigits(nDigits), SYMCRYPT_MAX(SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS(nDigits), SYMCRYPT_SCRATCH_BYTES_FOR_MODINV(nDigits))); + + SYMCRYPT_ASSERT( cbScratch >= 6 * nBytes + nCommon ); + + cbAllScratch = cbScratch; + pBegin = pbScratch; + + // + // Create mod elements + // + peX2 = SymCryptModElementCreate( pbScratch, nBytes, pmMod ); + pbScratch += nBytes; + + peZ2 = SymCryptModElementCreate( pbScratch, nBytes, pmMod ); + pbScratch += nBytes; + + peX3 = SymCryptModElementCreate( pbScratch, nBytes, pmMod ); + pbScratch += nBytes; + + peZ3 = SymCryptModElementCreate( pbScratch, nBytes, pmMod ); + pbScratch += nBytes; + + peTemp1 = SymCryptModElementCreate( pbScratch, nBytes, pmMod ); + pbScratch += nBytes; + + peTemp2 = SymCryptModElementCreate( pbScratch, nBytes, pmMod ); + pbScratch += nBytes; + + cbScratch = nCommon; + + // + // Set up values + // + + peA24 = pCurve->A; + + // X1 = X, Z1 = Z + peX1 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poSrc); + peZ1 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poSrc); + + // X2 = 1, Z2 = 0, X3 = X, Z3 = Z + SymCryptModElementSetValueUint32( 1, pmMod, peX2, pbScratch, cbScratch ); + SymCryptModElementSetValueUint32( 0, pmMod, peZ2, pbScratch, cbScratch ); + SymCryptModElementCopy( pmMod, peX1, peX3 ); + SymCryptModElementCopy( pmMod, peZ1, peZ3 ); + + if ( poSrc->normalized ) + { + // Set peZ1 to NULL to avoid redundant multiplications in SymCryptMontgomeryDoubleAndAdd + peZ1 = NULL; + } + + // + // Montgomery ladder scalar multiplication + // + + i = (pCurve->GOrdBitsize + pCurve->coFactorPower); + cond = 0; + while ( i != 0 ) + { + // If cond = 0, we have (X2, Z2, X3, Z3) + // if cond = 1, we have (X3, Z3, X2, Z2) + i--; + newcond = SymCryptIntGetBit( piScalar, i ); + cond ^= newcond; + + SymCryptModElementConditionalSwap( pmMod, peX2, peX3, cond); + SymCryptModElementConditionalSwap( pmMod, peZ2, peZ3, cond); + + cond = newcond; + + SymCryptMontgomeryDoubleAndAdd( pmMod, peX1, peZ1, peA24, peX2, peZ2, peX3, peZ3, peTemp1, peTemp2, pbScratch, cbScratch ); + } + + // Now put them back in the normal order + SymCryptModElementConditionalSwap( pmMod, peX2, peX3, cond); + SymCryptModElementConditionalSwap( pmMod, peZ2, peZ3, cond); + + // Multiply by the cofactor (if needed) by continuing the doubling + if ((flags & SYMCRYPT_FLAG_ECC_LL_COFACTOR_MUL) != 0) + { + i = pCurve->coFactorPower; + while (i!=0) + { + i--; + // We only use the doubling output here, so we definitely don't need to provide Z1 + // We could refactor to have a separate SymCryptMontgomeryDouble function but for Curve25519 this loop is ~1% of runtime + SymCryptMontgomeryDoubleAndAdd( pmMod, peX1, NULL, peA24, peX2, peZ2, peX3, peZ3, peTemp1, peTemp2, pbScratch, cbScratch ); + } + } + + // Set X coordinate + peResult = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poDst); + SymCryptModElementCopy( pCurve->FMod, peX2, peResult ); + + // Set Z coordinate + peResult = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poDst); + SymCryptModElementCopy( pCurve->FMod, peZ2, peResult ); + + poDst->normalized = FALSE; + + scError = SYMCRYPT_NO_ERROR; + +cleanup: + return scError; +} diff --git a/libs/symcrypt/lib/ec_mul.c b/libs/symcrypt/lib/ec_mul.c new file mode 100644 index 00000000000..615edffa201 --- /dev/null +++ b/libs/symcrypt/lib/ec_mul.c @@ -0,0 +1,571 @@ +// +// ec_mul.c Generic multiplication algorithms for elliptic curves +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// + +#include "precomp.h" + +// +// Most of the following algorithms were presented in the paper +// "Selecting Elliptic Curves for Cryptography: An Efficiency and +// Security Analysis" by Bos, Costello, Longa, and Naehrig +// + +// +// The following is an adaptation of algorithm 4: "Precomputation +// scheme for Weierstrass curves" +// +// Input: Point P and number of precomputed points nPoints (=2^(w-2)) +// +// Output: P[i] = (2*i+1)P for 0<=i<2^(w-2) +// +// Remarks: +// 1. We store each point in an array of 4*2^(w-2) = 2^w modelements where +// each point is represented with X,Y,Z Jacobian coordinates and the W=-Y +// negated Y coordinate (so that we can get the negative of a point easily) +// 2. The source point P is already in the 0'th position of the array. +// +VOID +SYMCRYPT_CALL +SymCryptPrecomputation( + _In_ PCSYMCRYPT_ECURVE pCurve, + UINT32 nPoints, + _In_reads_( SYMCRYPT_ECURVE_SW_MAX_NPRECOMP_POINTS ) + PSYMCRYPT_ECPOINT * poPIs, + _Out_ PSYMCRYPT_ECPOINT poQ, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poQ->pCurve) ); + // Calculation for Q = 2*P + SymCryptEcpointDouble( pCurve, poPIs[0], poQ, 0, pbScratch, cbScratch ); + + for (UINT32 i=1; i<nPoints; i++) + { + // Calculation for (2i+1)*P = i*Q + P + SymCryptEcpointAddDiffNonZero( pCurve, poQ, poPIs[i-1], poPIs[i], pbScratch, cbScratch ); + } +} + +VOID +SYMCRYPT_CALL +SymCryptOfflinePrecomputation( + _In_ PSYMCRYPT_ECURVE pCurve, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + PSYMCRYPT_ECPOINT poQ = NULL; + + UINT32 cbEcpoint = SymCryptSizeofEcpointFromCurve( pCurve ); + + SYMCRYPT_ASSERT( cbScratch >= cbEcpoint + SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pCurve->FModDigits ) ); + + poQ = SymCryptEcpointCreate( pbScratch, cbEcpoint, pCurve ); + SYMCRYPT_ASSERT( poQ != NULL ); + pbScratch += cbEcpoint; + cbScratch -= cbEcpoint; + + SymCryptPrecomputation( + pCurve, + pCurve->info.sw.nPrecompPoints, + pCurve->info.sw.poPrecompPoints, + poQ, + pbScratch, + cbScratch ); +} + +// Mask which is 0xffffffff only when _index == _target +#define DELTA_MASK( _index, _target) SYMCRYPT_MASK32_ZERO( (_index) ^ (_target) ) + +// +// The following is an adaptation of algorithm 1: "Variable-base scalar multiplication +// using the fixed-window method" +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEcpointScalarMulFixedWindow( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_INT piScalar, + _In_opt_ + PCSYMCRYPT_ECPOINT poSrc, + UINT32 flags, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + + PCSYMCRYPT_MODULUS FMod = pCurve->FMod; + + UINT32 i, j; + + UINT32 w = pCurve->info.sw.window; + UINT32 nPrecompPoints = pCurve->info.sw.nPrecompPoints; + // dcl - assuming that nRecodedDigits has some reasonably small range - please document + // so that we can know usage of this variable will not cause problems + // Also, documentation of inputs, notes, etc at the function definition would be quite helpful + UINT32 nRecodedDigits = ((pCurve->GOrdBitsize + w - 2) / (w-1)) + 1; + + // Masks + UINT32 fZero = 0; + UINT32 fEven = 0; + UINT32 indexMask = 0; + + BOOLEAN bPrecompOffline = FALSE; + + // ==================================================== + // Temporaries + PSYMCRYPT_MODELEMENT peT = NULL; + PSYMCRYPT_ECPOINT poPIs[SYMCRYPT_ECURVE_SW_MAX_NPRECOMP_POINTS] = { 0 }; + PSYMCRYPT_ECPOINT poQ = NULL; + PSYMCRYPT_ECPOINT poTmp = NULL; + PSYMCRYPT_INT piRem = NULL; + PSYMCRYPT_INT piTmp = NULL; + PUINT32 absofKIs = NULL; + PUINT32 sigofKIs = NULL; + // =================================================== + + PSYMCRYPT_MODELEMENT peQX = NULL; + PSYMCRYPT_MODELEMENT peQY = NULL; + PSYMCRYPT_MODELEMENT peQZ = NULL; + + SIZE_T cbEcpoint = SymCryptSizeofEcpointFromCurve( pCurve ); + SIZE_T cbScalar = SymCryptSizeofIntFromDigits( pCurve->GOrdDigits ); + + // Make sure we only specify the correct flags + if ((flags & ~SYMCRYPT_FLAG_ECC_LL_COFACTOR_MUL) != 0) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto exit; + } + + // Check if poSrc is NULL and if yes set it to G + if (poSrc == NULL) + { + poSrc = pCurve->G; + bPrecompOffline = TRUE; + } + + SYMCRYPT_ASSERT( SYMCRYPT_CURVE_IS_SHORT_WEIERSTRASS_TYPE(pCurve) || + SYMCRYPT_CURVE_IS_TWISTED_EDWARDS_TYPE(pCurve) ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poSrc->pCurve) && SymCryptEcurveIsSame(pCurve, poDst->pCurve) ); + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_SCALAR_ECURVE_OPERATIONS(pCurve, 1) ); + + SYMCRYPT_ASSERT( cbScratch >= + pCurve->cbModElement + + (nPrecompPoints+2)*cbEcpoint + + 2*cbScalar + + ((2*nRecodedDigits*sizeof(UINT32) + SYMCRYPT_ASYM_ALIGN_VALUE - 1)/SYMCRYPT_ASYM_ALIGN_VALUE )*SYMCRYPT_ASYM_ALIGN_VALUE + + SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS( pCurve ) ); + + // Creating temporary modelement + peT = SymCryptModElementCreate( pbScratch, pCurve->cbModElement, FMod ); + SYMCRYPT_ASSERT( peT != NULL ); + pbScratch += pCurve->cbModElement; + + // Creating temporary precomputed points (if needed) + SYMCRYPT_ASSERT( nPrecompPoints <= SYMCRYPT_ECURVE_SW_MAX_NPRECOMP_POINTS ); + for (i=0; i<nPrecompPoints; i++) + { + if (bPrecompOffline) + { + poPIs[i] = pCurve->info.sw.poPrecompPoints[i]; + } + else + { + poPIs[i] = SymCryptEcpointCreate( pbScratch, cbEcpoint, pCurve ); + SYMCRYPT_ASSERT( poPIs[i] != NULL ); + pbScratch += cbEcpoint; + } + } + + // Creating temporary points + poQ = SymCryptEcpointCreate( pbScratch, cbEcpoint, pCurve ); + SYMCRYPT_ASSERT( poQ != NULL ); + pbScratch += cbEcpoint; + + poTmp = SymCryptEcpointCreate( pbScratch, cbEcpoint, pCurve ); + SYMCRYPT_ASSERT( poTmp != NULL ); + pbScratch += cbEcpoint; + + // Creating temporary scalar for the remainder + piRem = SymCryptIntCreate( pbScratch, cbScalar, pCurve->GOrdDigits ); + SYMCRYPT_ASSERT( piRem != NULL); + pbScratch += cbScalar; + + piTmp = SymCryptIntCreate( pbScratch, cbScalar, pCurve->GOrdDigits ); + SYMCRYPT_ASSERT( piTmp != NULL); + pbScratch += cbScalar; + + // Fixing pointers to recoded digits (be careful that the remaining space is SYMCRYPT_ASYM_ALIGNed) + absofKIs = (PUINT32) pbScratch; + pbScratch += nRecodedDigits * sizeof(UINT32); + sigofKIs = (PUINT32) pbScratch; + pbScratch += nRecodedDigits * sizeof(UINT32); + pbScratch = (PBYTE) ( ((SIZE_T)pbScratch + SYMCRYPT_ASYM_ALIGN_VALUE - 1) & ~(SYMCRYPT_ASYM_ALIGN_VALUE - 1) ); + + // Fixing remaining scratch space size + cbScratch -= ( pCurve->cbModElement + (nPrecompPoints+2)*cbEcpoint + 2*cbScalar ); + cbScratch -= (((2*nRecodedDigits*sizeof(UINT32) + SYMCRYPT_ASYM_ALIGN_VALUE - 1)/SYMCRYPT_ASYM_ALIGN_VALUE )*SYMCRYPT_ASYM_ALIGN_VALUE); + + // + // Main algorithm + // + + // It is the caller's responsibility to ensure that the provided piScalar <= GOrd, double check this in debug mode + SYMCRYPT_ASSERT( !SymCryptIntIsLessThan( SymCryptIntFromModulus( pCurve->GOrd ), piScalar ) ); + + // Store k into an int + SymCryptIntCopy( piScalar, piRem ); + + // Check if k is 0 + fZero = SymCryptIntIsEqualUint32( piRem, 0 ); + + // Or if the src point is zero + fZero |= SymCryptEcpointIsZero( pCurve, poSrc, pbScratch, cbScratch ); + + // Check if k is even and convert it to r-k if true + fEven = SYMCRYPT_MASK32_ZERO(SymCryptIntGetBit( piRem, 0 )); + SymCryptIntSubSameSize( SymCryptIntFromModulus(pCurve->GOrd), piRem, piTmp); + SymCryptIntMaskedCopy( piTmp, piRem, fEven ); + + // Recoding stage + SymCryptFixedWindowRecoding( w, piRem, piTmp, absofKIs, sigofKIs, nRecodedDigits ); + + // Precomputation stage + if (!bPrecompOffline) + { + // Copy the first point in the start of the poPIs array + SymCryptEcpointCopy( pCurve, poSrc, poPIs[0] ); + + SymCryptPrecomputation( pCurve, nPrecompPoints, poPIs, poQ, pbScratch, cbScratch ); + } + + + // Get the pointers to Q + peQX = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poQ ); + peQY = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poQ ); + peQZ = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poQ ); + + // Q = P[ (|k_t|-1)/2 ] in memory access side-channel safe way + // That is, we touch all the precomputed points. The access pattern of KIs is fixed. + for (j=0; j<nPrecompPoints; j++) + { + indexMask = DELTA_MASK( j, absofKIs[nRecodedDigits-1] ); + SymCryptEcpointMaskedCopy( pCurve, poPIs[j], poQ, indexMask); + } + + for (i=nRecodedDigits - 2; i>0; i--) + { + // Q = 2^(w-1) * Q + for (j=0; j<w-1; j++) + { + SymCryptEcpointDouble( pCurve, poQ, poQ, 0, pbScratch, cbScratch ); + } + + // Copy the required precomputed point into poTmp (touch all points) + for (j=0; j<nPrecompPoints; j++) + { + indexMask = DELTA_MASK( j, absofKIs[i] ); + SymCryptEcpointMaskedCopy( pCurve, poPIs[j], poTmp, indexMask); + } + + // Negate if needed + SymCryptEcpointNegate( pCurve, poTmp, sigofKIs[i], pbScratch, cbScratch ); + + // Do the addition Q + s_i P[k_i] + SymCryptEcpointAddDiffNonZero( pCurve, poQ, poTmp, poQ, pbScratch, cbScratch ); + } + + // Q = 2^(w-1) * Q + for (j=0; j<w-1; j++) + { + SymCryptEcpointDouble( pCurve, poQ, poQ, 0, pbScratch, cbScratch ); + } + + // Copy the point s_0 P[k_0] into poTmp + for (j=0; j<nPrecompPoints; j++) + { + indexMask = DELTA_MASK( j, absofKIs[0] ); + SymCryptEcpointMaskedCopy( pCurve, poPIs[j], poTmp, indexMask); + } + + // Negate if needed + SymCryptEcpointNegate( pCurve, poTmp, sigofKIs[0], pbScratch, cbScratch ); + + // Complete addition routine + SymCryptEcpointAdd( pCurve, poQ, poTmp, poQ, 0, pbScratch, cbScratch ); + + // If even invert + SymCryptEcpointNegate( pCurve, poQ, fEven, pbScratch, cbScratch ); + + // Multiply by the cofactor (if needed) by continuing the doubling + if ((pCurve->coFactorPower!=0) && ((flags & SYMCRYPT_FLAG_ECC_LL_COFACTOR_MUL) != 0)) + { + for (j=0; j<pCurve->coFactorPower; j++) + { + SymCryptEcpointDouble( pCurve, poQ, poQ, 0, pbScratch, cbScratch ); + } + } + + // If the resultant point is zero, ensure it will be set to the canonical zero point + fZero |= SymCryptEcpointIsZero( pCurve, poQ, pbScratch, cbScratch ); + + // Set the zero point + SymCryptEcpointSetZero( pCurve, poTmp, pbScratch, cbScratch ); + SymCryptEcpointMaskedCopy( pCurve, poTmp, poQ, fZero ); + + // Output the result (normalized flag == FALSE) + SymCryptEcpointCopy( pCurve, poQ, poDst ); + + scError = SYMCRYPT_NO_ERROR; + +exit: + + return scError; +} + +// +// The following is an adaptation of algorithm 9: "Double-scalar multiplication using the +// width-w NAF with interleaving" +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEcpointMultiScalarMulWnafWithInterleaving( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_reads_( nPoints ) PCSYMCRYPT_INT * piSrcScalarArray, + _In_reads_( nPoints ) PCSYMCRYPT_ECPOINT * poSrcEcpointArray, + _In_ UINT32 nPoints, + _In_ UINT32 flags, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + + UINT32 i, j; + + UINT32 w = pCurve->info.sw.window; + UINT32 nPrecompPoints = pCurve->info.sw.nPrecompPoints; // One table for each base + UINT32 nRecodedDigits = pCurve->GOrdBitsize + 1; // Notice the difference with the fixed window + + // Masks + UINT32 fZero[SYMCRYPT_ECURVE_MULTI_SCALAR_MUL_MAX_NPOINTS] = { 0 }; + UINT32 fZeroTot = 0xffffffff; + + BOOLEAN bPrecompOffline = FALSE; + + // ==================================================== + // Temporaries + PSYMCRYPT_ECPOINT poPIs[SYMCRYPT_ECURVE_SW_MAX_NPRECOMP_POINTS] = { 0 }; + PSYMCRYPT_ECPOINT poQ = NULL; + PSYMCRYPT_ECPOINT poTmp = NULL; + PSYMCRYPT_INT piRem = NULL; + PSYMCRYPT_INT piTmp = NULL; + + PUINT32 absofKIs = NULL; + PUINT32 sigofKIs = NULL; + // =================================================== + + SIZE_T cbEcpoint = SymCryptSizeofEcpointFromCurve( pCurve ); + SIZE_T cbScalar = SymCryptSizeofIntFromDigits( pCurve->GOrdDigits ); + + PBYTE pbScratchEnd = pbScratch + cbScratch; + UNREFERENCED_PARAMETER( pbScratchEnd ); // Used in asserts + + // Make sure we only specify the correct flags + if ((flags & ~(SYMCRYPT_FLAG_DATA_PUBLIC | SYMCRYPT_FLAG_ECC_LL_COFACTOR_MUL)) != 0) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto exit; + } + + // Check the maximum number of points + if (nPoints > SYMCRYPT_ECURVE_MULTI_SCALAR_MUL_MAX_NPOINTS) + { + scError = SYMCRYPT_NOT_IMPLEMENTED; + goto exit; + } + + // Check if the first point is NULL + if (poSrcEcpointArray[0] == NULL) + { + poSrcEcpointArray[0] = pCurve->G; + bPrecompOffline = TRUE; + } + + // Make sure that the non side-channel flag is specified + if ((flags & SYMCRYPT_FLAG_DATA_PUBLIC) == 0 ) + { + scError = SYMCRYPT_NOT_IMPLEMENTED; + goto exit; + } + + SYMCRYPT_ASSERT( SYMCRYPT_CURVE_IS_SHORT_WEIERSTRASS_TYPE(pCurve) || + SYMCRYPT_CURVE_IS_TWISTED_EDWARDS_TYPE(pCurve) ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poDst->pCurve) ); + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_SCALAR_ECURVE_OPERATIONS(pCurve, nPoints) ); + + // Creating temporary precomputed points (if needed for the first point) + for (i=0; i<nPoints*nPrecompPoints; i++) + { + if ((i<nPrecompPoints) && bPrecompOffline) + { + poPIs[i] = pCurve->info.sw.poPrecompPoints[i]; + } + else + { + SYMCRYPT_ASSERT( pbScratch + cbEcpoint <= pbScratchEnd ); + poPIs[i] = SymCryptEcpointCreate( pbScratch, cbEcpoint, pCurve ); + SYMCRYPT_ASSERT( poPIs[i] != NULL ); + pbScratch += cbEcpoint; + } + } + + SYMCRYPT_ASSERT( pbScratch + 2*cbEcpoint + 2*cbScalar + 2*nPoints*nRecodedDigits*sizeof(UINT32) <= pbScratchEnd ); + // Creating temporary points + poQ = SymCryptEcpointCreate( pbScratch, cbEcpoint, pCurve ); + SYMCRYPT_ASSERT( poQ != NULL ); + pbScratch += cbEcpoint; + + poTmp = SymCryptEcpointCreate( pbScratch, cbEcpoint, pCurve ); + SYMCRYPT_ASSERT( poTmp != NULL ); + pbScratch += cbEcpoint; + + // Creating temporary scalar for the remainder + piRem = SymCryptIntCreate( pbScratch, cbScalar, pCurve->GOrdDigits ); + SYMCRYPT_ASSERT( piRem != NULL); + pbScratch += cbScalar; + + piTmp = SymCryptIntCreate( pbScratch, cbScalar, pCurve->GOrdDigits ); + SYMCRYPT_ASSERT( piTmp != NULL); + pbScratch += cbScalar; + + // Fixing pointers to recoded digits (be careful that the remaining space is SYMCRYPT_ASYM_ALIGNed) + absofKIs = (PUINT32) pbScratch; + pbScratch += nPoints * nRecodedDigits * sizeof(UINT32); + sigofKIs = (PUINT32) pbScratch; + pbScratch += nPoints * nRecodedDigits * sizeof(UINT32); + pbScratch = (PBYTE) ( ((SIZE_T)pbScratch + SYMCRYPT_ASYM_ALIGN_VALUE - 1) & ~(SYMCRYPT_ASYM_ALIGN_VALUE - 1) ); + + // Fixing remaining scratch space size + // dcl - my guess is that the values here are small enough that there should not be a problem, but + // would be better if that were documented. + cbScratch -= ( (nPoints*nPrecompPoints+2)*cbEcpoint + 2*cbScalar ); + cbScratch -= (((2*nPoints*nRecodedDigits*sizeof(UINT32) + SYMCRYPT_ASYM_ALIGN_VALUE - 1)/SYMCRYPT_ASYM_ALIGN_VALUE )*SYMCRYPT_ASYM_ALIGN_VALUE); + + // + // Main algorithm + // + for (j = 0; j<nPoints; j++) + { + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poSrcEcpointArray[j]->pCurve) ); + + // Check if k is 0 or if the src point is zero + fZero[j] = ( SymCryptIntIsEqualUint32( piSrcScalarArray[j], 0 ) | SymCryptEcpointIsZero( pCurve, poSrcEcpointArray[j], pbScratch, cbScratch ) ); + fZeroTot &= fZero[j]; + + // Skip the recoding stage (and all remaining steps) if this point will give result zero + if (!fZero[j]) + { + SymCryptIntCopy( piSrcScalarArray[j], piRem ); + + // Recoding stage + SymCryptWidthNafRecoding( w, piRem, &absofKIs[j*nRecodedDigits], &sigofKIs[j*nRecodedDigits], nRecodedDigits ); + + // Precomputation stage + if ((j>0) || !bPrecompOffline) + { + // Copy the first point in the start of the poPIs array + SymCryptEcpointCopy( pCurve, poSrcEcpointArray[j], poPIs[j*nPrecompPoints] ); + + SymCryptPrecomputation( pCurve, nPrecompPoints, &poPIs[j*nPrecompPoints], poQ, pbScratch, cbScratch ); + } + } + } + + // Set poQ to zero point + SymCryptEcpointSetZero( pCurve, poQ, pbScratch, cbScratch ); + + if (!fZeroTot) + { + // Main loop + for (INT32 i = nRecodedDigits-1; i>-1; i--) + { + SymCryptEcpointDouble( pCurve, poQ, poQ, 0, pbScratch, cbScratch ); + + for (j = 0; j<nPoints; j++) + { + if (!fZero[j] && sigofKIs[j*nRecodedDigits + i] != 0) + { + SymCryptEcpointCopy( pCurve, poPIs[j*nPrecompPoints + absofKIs[j*nRecodedDigits + i]/2], poTmp ); + + if (sigofKIs[j*nRecodedDigits + i] == 0xffffffff) + { + SymCryptEcpointNegate( pCurve, poTmp, 0xffffffff, pbScratch, cbScratch ); + } + + SymCryptEcpointAdd( pCurve, poQ, poTmp, poQ, SYMCRYPT_FLAG_DATA_PUBLIC, pbScratch, cbScratch ); + } + } + } + } + + // Multiply by the cofactor (if needed) by continuing the doubling + if ((pCurve->coFactorPower!=0) && ((flags & SYMCRYPT_FLAG_ECC_LL_COFACTOR_MUL) != 0)) + { + for (j=0; j<pCurve->coFactorPower; j++) + { + SymCryptEcpointDouble( pCurve, poQ, poQ, 0, pbScratch, cbScratch ); + } + } + + // If the resultant point is zero, ensure it will be set to the canonical zero point + if ( SymCryptEcpointIsZero( pCurve, poQ, pbScratch, cbScratch ) ) + { + // Set poQ to zero point + SymCryptEcpointSetZero( pCurve, poQ, pbScratch, cbScratch ); + } + + // Copy the result to the destination (normalized flag == FALSE) + SymCryptEcpointCopy( pCurve, poQ, poDst ); + + scError = SYMCRYPT_NO_ERROR; + +exit: + return scError; +} + +VOID +SYMCRYPT_CALL +SymCryptEcpointGenericSetRandom( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Out_ PSYMCRYPT_INT piScalar, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + PSYMCRYPT_MODELEMENT peScalar = NULL; + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poDst->pCurve) ); + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_SCALAR_ECURVE_OPERATIONS(pCurve, 1) ); + SYMCRYPT_ASSERT( cbScratch >= pCurve->cbModElement ); + + peScalar = SymCryptModElementCreate( pbScratch, pCurve->cbModElement, pCurve->GOrd ); + SYMCRYPT_ASSERT( peScalar != NULL ); + + // Setting a random mod element in the [1, SubgroupOrder-1] set + SymCryptModSetRandom( pCurve->GOrd, peScalar, (SYMCRYPT_FLAG_MODRANDOM_ALLOW_ONE|SYMCRYPT_FLAG_MODRANDOM_ALLOW_MINUSONE), pbScratch + pCurve->cbModElement, cbScratch - pCurve->cbModElement ); + + // Setting the integer + SymCryptModElementToInt( pCurve->GOrd, peScalar, piScalar, pbScratch + pCurve->cbModElement, cbScratch - pCurve->cbModElement ); + + // Do the multiplication (pass over the entire scratch space as it is not needed anymore) + // !! Explicitly not checking the error return here as the only error is from specifying invalid flags !! + SymCryptEcpointScalarMul( pCurve, piScalar, NULL, 0, poDst, pbScratch, cbScratch ); +} diff --git a/libs/symcrypt/lib/ec_short_weierstrass.c b/libs/symcrypt/lib/ec_short_weierstrass.c new file mode 100644 index 00000000000..ca8399a5fd7 --- /dev/null +++ b/libs/symcrypt/lib/ec_short_weierstrass.c @@ -0,0 +1,935 @@ +// +// ec_short_weierstrass.c ECPOINT functions for short Weierstrass curves. +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// + +#include "precomp.h" + +// +// Scratch space requirements for each ECPOINT function. +// +// A function's requirements in scratch space consist of requirements for its own arithmetic +// operations and temporaries ("self" scratch space) and scratch space requirements for other +// ECPOINT functions it might call ("callee" scratch space). +// +// If the outer function does not need the temporaries after calling the inner ECPOINT +// function, then the total scratch space can be the maximum of both. Otherwise the scratch +// space of the outer function should be the concatenation of the "self" scratch space and +// the "callee" scratch space. +// +// The following table shows the scratch space requirements of each function with appropriate +// abbreviations. The calling sequence implies a directed graph that starting from the "leaves" +// (functions that do no call others) allows to calculate the total scratch space requirements. +// +// #N Function Calls Function Self Temporaries Self Scratch +// 1 SetZero - 0 COM_MOD(FMod) +// 2 SetDistinguishedPoint - 0 0 +// 3 IsEqual - 4 ModEl COM_MOD(FMod) +// 4 IsZero - 1 ModEl COM_MOD(FMod) +// 5 OnCurve - 2 ModEl COM_MOD(FMod) +// 6 Double 1,4,9 2 Ecp 0 +// 7 Add 1,3,4,8,9 2 Ecp 0 +// 8 AddDiffNonZero - 8 ModEl COM_MOD(FMod) +// 9 Double - 6 ModEl COM_MOD(FMod) +// +// 10 SetRandom 11 0 COM_MOD(GOrd) +// 11 ScalarMul 4,5,7 1ModEl + (n+2)Ecp + 2Int COM_MOD(GOrd) +// +// Since only 4 functions call others and to keep things simple, we will have 2 +// types of scratch space: "ECURVE_COMMON" and "ECURVE_SCALAR" +// +// ---- All functions except 10 and 11 will use the "ECURVE_COMMON" scratch space. The size of it +// depends only on parameters of the curve. Schematically it will be: +// |----------COMMON------------------------------------------------------------------| +// |------8 ModEl + 2 Ecpoint----||------COM_MOD(FMod)--------------------------------| +// +// ---- The SetRandom and ScalarMul have requirements that depend on temporaries for the pre-computation. +// Also they depend on the "self" temporaries after calling the inner functions. +// Therefore, these will require the "ECURVE_SCALAR" scratch space which +// consists of two parts: The self space for the above two functions and the +// common scratch space. These parts SHOULD NOT overlap. Schematically: +// +// |--------------SCALAR---------------------------------------------------| +// |----1ModEl + (n+2)Ecp + 2Int--------||---max(COMMON, COM_MOD(GOrd)----| + +// The scratch space sizes are all calculated by the following function. +// *** Notice that almost all the curve parameters (exception is the distinguished point) +// must have been initialized before calling this function. +VOID +SYMCRYPT_CALL +SymCryptShortWeierstrassFillScratchSpaces( _In_ PSYMCRYPT_ECURVE pCurve ) +{ + UINT32 nDigits = SymCryptDigitsFromBits( pCurve->FModBitsize ); + + // + // All the scratch space computations are upper bounded by the SizeofXXX bound (2^19) and + // the SCRATCH_BYTES_FOR_XXX bound (2^24) (see symcrypt_internal.h). + // + // One caveat is SymCryptSizeofEcpointFromCurve and SymCryptSizeofEcpointEx which calculate + // the size of EcPoint with 4 coordinates (each one a modelement of max size 2^17). Thus upper + // bounded by 2^20. + // + // Another is the precomp points computation where the nPrecompPoints are up to + // 2^SYMCRYPT_ECURVE_SW_DEF_WINDOW = 2^6 and the nRecodedDigits are equal to the + // GOrd bitsize < 2^20. + // + // Thus cbScratchScalarMulti is upper bounded by 2^6*2^20 + 2*2^20*2^4 ~ 2^26. + // + + // Common + pCurve->cbScratchCommon = + 8 * pCurve->cbModElement + + 2 * SymCryptSizeofEcpointFromCurve( pCurve ) + + SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pCurve->FModDigits ); + + // Scalar (Overhead) + pCurve->cbScratchScalar = + pCurve->cbModElement + + 2 * SymCryptSizeofEcpointFromCurve( pCurve ) + + 2 * SymCryptSizeofIntFromDigits( pCurve->GOrdDigits ) + + SYMCRYPT_MAX( pCurve->cbScratchCommon, SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pCurve->GOrdDigits )); + + // Scalar dependent on precomp points (be careful to align the UINT32 arrays properly) + pCurve->cbScratchScalarMulti = + pCurve->info.sw.nPrecompPoints * SymCryptSizeofEcpointFromCurve( pCurve ) + + ((2*pCurve->info.sw.nRecodedDigits * sizeof(UINT32) + SYMCRYPT_ASYM_ALIGN_VALUE - 1 )/SYMCRYPT_ASYM_ALIGN_VALUE) * SYMCRYPT_ASYM_ALIGN_VALUE; + + // GetSetValue + pCurve->cbScratchGetSetValue = + SymCryptSizeofEcpointEx( pCurve->cbModElement, SYMCRYPT_ECPOINT_FORMAT_MAX_LENGTH ) + + 2 * pCurve->cbModElement + + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pCurve->FModDigits ), + SYMCRYPT_SCRATCH_BYTES_FOR_MODINV( pCurve->FModDigits ) ); + + pCurve->cbScratchGetSetValue = SYMCRYPT_MAX( pCurve->cbScratchGetSetValue, SymCryptSizeofIntFromDigits( nDigits ) ); + + // Eckey + pCurve->cbScratchEckey = + SYMCRYPT_MAX( pCurve->cbModElement + SymCryptSizeofIntFromDigits(SymCryptEcurveDigitsofScalarMultiplier(pCurve)), + SymCryptSizeofEcpointFromCurve( pCurve ) ) + + SYMCRYPT_MAX( pCurve->cbScratchScalar + pCurve->cbScratchScalarMulti, pCurve->cbScratchGetSetValue ); +} + +// +// The following function sets the point to (1:1:0) in Jacobian coordinates. +// +VOID +SYMCRYPT_CALL +SymCryptShortWeierstrassSetZero( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + PCSYMCRYPT_MODULUS FMod = pCurve->FMod; + PSYMCRYPT_MODELEMENT peTmp = NULL; + + SYMCRYPT_ASSERT( SYMCRYPT_CURVE_IS_SHORT_WEIERSTRASS_TYPE(pCurve) ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poDst->pCurve) ); + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pCurve->FModDigits ) ); + + // Getting handle to X + peTmp = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poDst ); + + // Setting the right value (always 1) + SymCryptModElementSetValueUint32( 1, FMod, peTmp, pbScratch, cbScratch ); + + // Getting handle to Y + peTmp = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poDst ); + + // Setting the right value (always 1) + SymCryptModElementSetValueUint32( 1, FMod, peTmp, pbScratch, cbScratch ); + + // Getting handle to Z + peTmp = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poDst ); + + // Setting the right value (always 0) + SymCryptModElementSetValueUint32( 0, pCurve->FMod, peTmp, pbScratch, cbScratch ); +} + +VOID +SYMCRYPT_CALL +SymCryptShortWeierstrassSetDistinguished( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ASSERT( SYMCRYPT_CURVE_IS_SHORT_WEIERSTRASS_TYPE(pCurve) ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poDst->pCurve) ); + + UNREFERENCED_PARAMETER( pbScratch ); + UNREFERENCED_PARAMETER( cbScratch ); + + SymCryptEcpointCopy( pCurve, pCurve->G, poDst ); +} + +// +// The following function checks if +// - X1*Z2^2 = X2*Z1^2 and Y1*Z2^3 = Y2*Z1^3 (Equal case) +// - X1*Z2^2 = X2*Z1^2 and Y1*Z2^3 = -Y2*Z1^3 (Negative case) +// +// Remark: The case where Z1 = Z2 = 0 is covered above (the zero point +// is equal to its negative). +// +UINT32 +SYMCRYPT_CALL +SymCryptShortWeierstrassIsEqual( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc1, + _In_ PCSYMCRYPT_ECPOINT poSrc2, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + PCSYMCRYPT_MODULUS FMod = pCurve->FMod; + + PSYMCRYPT_MODELEMENT peX1 = NULL; // Pointer to X1 + PSYMCRYPT_MODELEMENT peY1 = NULL; // Pointer to Y1 + PSYMCRYPT_MODELEMENT peZ1 = NULL; // Pointer to Z1 + PSYMCRYPT_MODELEMENT peX2 = NULL; // Pointer to X2 + PSYMCRYPT_MODELEMENT peY2 = NULL; // Pointer to Y2 + PSYMCRYPT_MODELEMENT peZ2 = NULL; // Pointer to Z2 + + UINT32 dResX = 0; + UINT32 dResY = 0; + UINT32 dResYN = 0; + + PSYMCRYPT_MODELEMENT peT[4] = { 0 }; // Temporaries + + SYMCRYPT_ASSERT( SYMCRYPT_CURVE_IS_SHORT_WEIERSTRASS_TYPE(pCurve) ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poSrc1->pCurve) && SymCryptEcurveIsSame(pCurve, poSrc2->pCurve) ); + SYMCRYPT_ASSERT( (flags & ~(SYMCRYPT_FLAG_ECPOINT_EQUAL|SYMCRYPT_FLAG_ECPOINT_NEG_EQUAL)) == 0 ); + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pCurve->FModDigits ) + 4 * pCurve->cbModElement ); + + // Creating temporaries + for (UINT32 i=0; i<4; i++) + { + peT[i] = SymCryptModElementCreate( + pbScratch, + pCurve->cbModElement, + FMod ); + + SYMCRYPT_ASSERT( peT[i] != NULL); + + pbScratch += pCurve->cbModElement; + } + + // Fixing remaining scratch space size + cbScratch -= 4 * pCurve->cbModElement; + + // Getting pointers to x and y of the source point + peX1 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poSrc1 ); + peY1 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poSrc1 ); + peZ1 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poSrc1 ); + peX2 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poSrc2 ); + peY2 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poSrc2 ); + peZ2 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poSrc2 ); + + // Setting the default flag if flags == 0 + flags |= ( SYMCRYPT_MASK32_ZERO( flags ) & SYMCRYPT_FLAG_ECPOINT_EQUAL ); + + // Calculation + SymCryptModSquare( FMod, peZ1, peT[0], pbScratch, cbScratch ); // T0 := Z1 * Z1 = Z1^2 + SymCryptModSquare( FMod, peZ2, peT[1], pbScratch, cbScratch ); // T1 := Z2 * Z2 = Z2^2 + SymCryptModMul( FMod, peX1, peT[1], peT[2], pbScratch, cbScratch ); // T2 := X1 * T1 = X1*Z2^2 + SymCryptModMul( FMod, peX2, peT[0], peT[3], pbScratch, cbScratch ); // T3 := X2 * T0 = X2*Z1^2 + + dResX = SymCryptModElementIsEqual( FMod, peT[2], peT[3] ); + + SymCryptModMul( FMod, peZ1, peT[0], peT[0], pbScratch, cbScratch ); // T0 := Z1 * T0 = Z1^3 + SymCryptModMul( FMod, peZ2, peT[1], peT[1], pbScratch, cbScratch ); // T1 := Z2 * T1 = Z2^3 + SymCryptModMul( FMod, peY1, peT[1], peT[2], pbScratch, cbScratch ); // T2 := Y1 * T1 = Y1*Z2^3 + SymCryptModMul( FMod, peY2, peT[0], peT[3], pbScratch, cbScratch ); // T3 := Y2 * T0 = Y2*Z1^3 + + dResY = SymCryptModElementIsEqual( FMod, peT[2], peT[3] ); + + SymCryptModNeg( FMod, peT[3], peT[3], pbScratch, cbScratch ); // T3 := -T3 = -Y2*Z1^3 + + dResYN = SymCryptModElementIsEqual( FMod, peT[2], peT[3] ); + + return (SYMCRYPT_MASK32_NONZERO(flags & SYMCRYPT_FLAG_ECPOINT_EQUAL) & dResX & dResY) | + (SYMCRYPT_MASK32_NONZERO(flags & SYMCRYPT_FLAG_ECPOINT_NEG_EQUAL) & dResX & dResYN); +} + +UINT32 +SYMCRYPT_CALL +SymCryptShortWeierstrassIsZero( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + PCSYMCRYPT_MODULUS FMod = pCurve->FMod; + PSYMCRYPT_MODELEMENT peZ = NULL; // Pointer to Z + + SYMCRYPT_ASSERT( SYMCRYPT_CURVE_IS_SHORT_WEIERSTRASS_TYPE(pCurve) ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poSrc->pCurve) ); + + UNREFERENCED_PARAMETER( pbScratch ); + UNREFERENCED_PARAMETER( cbScratch ); + + // Getting pointer to Z of the source point + peZ = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poSrc ); + + // Setting temporary to 0 + return SymCryptModElementIsZero( FMod, peZ ); +} + +// +// The following function verifies if the point (X:Y:Z) in Jacobian +// coordinates satisfies the equation Y^2 = X^3 + aXZ^4+bZ^6 . +// +UINT32 +SYMCRYPT_CALL +SymCryptShortWeierstrassOnCurve( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + PCSYMCRYPT_MODULUS FMod = pCurve->FMod; + + PSYMCRYPT_MODELEMENT peX = NULL; // Pointer to X + PSYMCRYPT_MODELEMENT peY = NULL; // Pointer to Y + PSYMCRYPT_MODELEMENT peZ = NULL; // Pointer to Z + + PSYMCRYPT_MODELEMENT peT[2] = { 0 }; // Temporaries + + SYMCRYPT_ASSERT( SYMCRYPT_CURVE_IS_SHORT_WEIERSTRASS_TYPE(pCurve) ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poSrc->pCurve) ); + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pCurve->FModDigits ) + 2 * pCurve->cbModElement ); + + // Creating temporaries + for (UINT32 i=0; i<2; i++) + { + peT[i] = SymCryptModElementCreate( + pbScratch, + pCurve->cbModElement, + FMod ); + + SYMCRYPT_ASSERT( peT[i] != NULL); + + pbScratch += pCurve->cbModElement; + } + + // Fixing remaining scratch space size + cbScratch -= 2*pCurve->cbModElement; + + // Getting pointers to coordinates of the source point + peX = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poSrc ); + peY = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poSrc ); + peZ = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poSrc ); + + // Calculation + SymCryptModSquare( FMod, peZ, peT[0], pbScratch, cbScratch ); // T1 := Z * Z = Z^2 + SymCryptModSquare( FMod, peT[0], peT[1], pbScratch, cbScratch ); // T2 := T1 * T1 = Z^4 + SymCryptModMul( FMod, peT[0], peT[1], peT[0], pbScratch, cbScratch ); // T1 := T1 * T2 = Z^6 + + SymCryptModMul( FMod, peT[0], pCurve->B, peT[0], pbScratch, cbScratch ); // T1 := T1 * b = bZ^6 + + SymCryptModMul( FMod, peT[1], peX, peT[1], pbScratch, cbScratch ); // T2 := T2 * X = XZ^4 + SymCryptModMul( FMod, peT[1], pCurve->A, peT[1], pbScratch, cbScratch ); // T2 := T2 * a = aXZ^4 + + SymCryptModAdd( FMod, peT[0], peT[1], peT[1], pbScratch, cbScratch ); // T2 := T1 + T2 = aXZ^4 + bZ^6 + + SymCryptModSquare( FMod, peX, peT[0], pbScratch, cbScratch ); // T1 := X * X = X^2 + SymCryptModMul( FMod, peT[0], peX, peT[0], pbScratch, cbScratch ); // T1 := T1 * X = X^3 + SymCryptModAdd( FMod, peT[0], peT[1], peT[1], pbScratch, cbScratch ); // T2 := T1 + T2 = X^3 + aXZ^4 + bZ^6 + + SymCryptModSquare( FMod, peY, peT[0], pbScratch, cbScratch ); // T1 := Y * Y = Y^2 + + return SymCryptModElementIsEqual( FMod, peT[0], peT[1] ); +} + +// +// based on dbl-2007-bl formula +// but tweaked by saml to +// a) remove overeager conversions from modular multiplication to modular squaring which introduce +// more addition/subtraction. With current implementations (based on montgomery reduction), +// the cost of [a square and an add/sub] is greater than the cost of [a multiplication] +// b) share intermediate results of producing 8YYYY. [add/sub] is ~10% of cost of mul, so reducing +// count of these operation has a real impact +// +// 2Y = 2*Y1 +// 2YY = 2Y*Y1 +// 4YY = 2*2YY +// 8YYYY = 2YY*4YY +// S = X1*4YY +// XX = X1^2 +// ZZ = Z1^2 +// ZZZZ = ZZ^2 +// M = 3*XX+a*ZZZZ +// T = M^2-2*S +// X3 = T +// Y3 = M*(S-T)-8YYYY +// Z3 = Z1*2Y +// +// Total cost: +// 6 Mul (1 by a) +// 4 Sqr +// 2 Add +// 4 Sub +// 3 Dbl +// +// Special Case: +// If the source point is equal to the identity +// point of the curve (i.e. Z1 = 0 in Jacobian +// coordinates) then the resulting point has +// Z3 = Z1*2Y1 = 0. Thus, this formula is +// complete (it works for all points). +// +VOID +SYMCRYPT_CALL +SymCryptShortWeierstrassDouble( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_ PSYMCRYPT_ECPOINT poDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + PCSYMCRYPT_MODULUS FMod = pCurve->FMod; + PSYMCRYPT_MODELEMENT peT[3] = { 0 }; // Temporaries + + PCSYMCRYPT_MODELEMENT peX1 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poSrc ); + PCSYMCRYPT_MODELEMENT peY1 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poSrc ); + PCSYMCRYPT_MODELEMENT peZ1 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poSrc ); + + PSYMCRYPT_MODELEMENT peX3 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poDst ); + PSYMCRYPT_MODELEMENT peY3 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poDst ); + PSYMCRYPT_MODELEMENT peZ3 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poDst ); + + SYMCRYPT_ASSERT( pCurve->type == SYMCRYPT_INTERNAL_ECURVE_TYPE_SHORT_WEIERSTRASS ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poSrc->pCurve) && SymCryptEcurveIsSame(pCurve, poDst->pCurve) ); + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pCurve->FModDigits ) + 3 * pCurve->cbModElement ); + + UNREFERENCED_PARAMETER( flags ); + + // Creating temporaries + for (UINT32 i=0; i<3; i++) + { + peT[i] = SymCryptModElementCreate( + pbScratch, + pCurve->cbModElement, + FMod ); + + SYMCRYPT_ASSERT( peT[i] != NULL); + + pbScratch += pCurve->cbModElement; + } + + // Fixing remaining scratch space size + cbScratch -= 3*pCurve->cbModElement; + + // Calculate the points + SymCryptModAdd( FMod, peY1, peY1, peT[0], pbScratch, cbScratch ); /* T0 := Y1 + Y1 = 2Y */ + SymCryptModSquare( FMod, peZ1, peT[1], pbScratch, cbScratch ); /* T1 := Z1 * Z1 = ZZ */ + SymCryptModMul( FMod, peT[0], peZ1, peZ3, pbScratch, cbScratch ); /* Z3 := 2Y * Z1 = 2YZ */ + + SymCryptModMul( FMod, peY1, peT[0], peY3, pbScratch, cbScratch ); /* Y3 := 2Y * Y1 = 2YY */ + SymCryptModAdd( FMod, peY3, peY3, peT[0], pbScratch, cbScratch ); /* T0 := 2YY + 2YY = 4YY */ + SymCryptModMul( FMod, peT[0], peY3, peY3, pbScratch, cbScratch ); /* Y3 := 2YY * 4YY = 8YYYY */ + + SymCryptModMul( FMod, peT[0], peX1, peT[0], pbScratch, cbScratch ); /* T0 := X1 * 4YY = 4XYY = S */ + + SymCryptModSquare( FMod, peT[1], peT[1], pbScratch, cbScratch ); /* T1 := T1 * T1 = ZZZZ */ + SymCryptModSquare( FMod, peX1, peT[2], pbScratch, cbScratch ); /* T2 := X1 * X1 = XX */ + SymCryptModMul( FMod, peT[1], pCurve->A, peT[1], pbScratch, cbScratch ); /* T1 := T1 * a = a*ZZZZ */ + SymCryptModAdd( FMod, peT[2], peT[1], peT[1], pbScratch, cbScratch ); /* T1 := T2 + T1 = XX + a*ZZZZ */ + SymCryptModAdd( FMod, peT[2], peT[2], peT[2], pbScratch, cbScratch ); /* T2 := T2 + T2 = 2*XX */ + SymCryptModAdd( FMod, peT[0], peT[0], peX3, pbScratch, cbScratch ); /* X3 := 2*S */ + SymCryptModAdd( FMod, peT[2], peT[1], peT[1], pbScratch, cbScratch ); /* T1 := T2 + T1 = 3*XX + a*ZZZZ = M */ + + SymCryptModSquare( FMod, peT[1], peT[2], pbScratch, cbScratch ); /* T2 := M^2 */ + SymCryptModSub( FMod, peT[2], peX3, peX3, pbScratch, cbScratch ); /* X3 := M^2 - 2*S = T */ + + SymCryptModSub( FMod, peT[0], peX3, peT[0], pbScratch, cbScratch ); /* T0 := S - T */ + SymCryptModMul( FMod, peT[1], peT[0], peT[0], pbScratch, cbScratch ); /* T0 := M * (S - T) */ + SymCryptModSub( FMod, peT[0], peY3, peY3, pbScratch, cbScratch ); /* Y3 := M * (S - T) - 8*YYYY */ +} + + +// +// based on dbl-2007-bl / dbl-2001-b formulae +// but tweaked by saml to +// a) remove overeager conversions from modular multiplication to modular squaring which introduce +// more addition/subtraction. With current implementations (based on montgomery reduction), +// the cost of [a square and an add/sub] is greater than the cost of [a multiplication] +// b) share intermediate results of producing 8YYYY. [add/sub] is ~10% of cost of mul, so reducing +// count of these operation has a real impact +// c) make use of knowledge that curve has a == -3, so M can be calculated more efficiently +// +// 2Y = 2*Y1 +// 2YY = 2Y*Y1 +// 4YY = 2*2YY +// 8YYYY = 2YY*4YY +// ZZ = Z1^2 +// S = X1*4YY +// M = 3*(X1+ZZ)*(X1-ZZ) = 3*(XX - ZZZZ) +// T = M^2-2*S +// X3 = T +// Y3 = M*(S-T)-8YYYY +// Z3 = 2Y*Z1 +// +// Total cost: +// 6 Mul +// 2 Sqr +// 2 Add +// 4 Sub +// 4 Dbl +// +// Special Case: +// If the source point is equal to the identity +// point of the curve (i.e. Z1 = 0 in Jacobian +// coordinates) then the resulting point has +// Z3 = Z1*2Y1 = 0. Thus, this formula is +// complete (it works for all points). +// +VOID +SYMCRYPT_CALL +SymCryptShortWeierstrassDoubleSpecializedAm3( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_ PSYMCRYPT_ECPOINT poDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + PCSYMCRYPT_MODULUS FMod = pCurve->FMod; + PSYMCRYPT_MODELEMENT peT[3] = { 0 }; // Temporaries + + PCSYMCRYPT_MODELEMENT peX1 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poSrc ); + PCSYMCRYPT_MODELEMENT peY1 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poSrc ); + PCSYMCRYPT_MODELEMENT peZ1 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poSrc ); + + PSYMCRYPT_MODELEMENT peX3 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poDst ); + PSYMCRYPT_MODELEMENT peY3 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poDst ); + PSYMCRYPT_MODELEMENT peZ3 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poDst ); + + SYMCRYPT_ASSERT( pCurve->type == SYMCRYPT_INTERNAL_ECURVE_TYPE_SHORT_WEIERSTRASS_AM3 ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poSrc->pCurve) && SymCryptEcurveIsSame(pCurve, poDst->pCurve) ); + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pCurve->FModDigits ) + 3 * pCurve->cbModElement ); + + UNREFERENCED_PARAMETER( flags ); + + // Creating temporaries + for (UINT32 i=0; i<3; i++) + { + peT[i] = SymCryptModElementCreate( + pbScratch, + pCurve->cbModElement, + FMod ); + + SYMCRYPT_ASSERT( peT[i] != NULL); + + pbScratch += pCurve->cbModElement; + } + + // Fixing remaining scratch space size + cbScratch -= 3*pCurve->cbModElement; + + // Calculate the points + SymCryptModAdd( FMod, peY1, peY1, peT[0], pbScratch, cbScratch ); /* T0 := Y1 + Y1 = 2Y */ + SymCryptModSquare( FMod, peZ1, peT[1], pbScratch, cbScratch ); /* T1 := Z1 * Z1 = ZZ */ + SymCryptModMul( FMod, peY1, peT[0], peY3, pbScratch, cbScratch ); /* Y3 := 2Y * Y1 = 2YY */ + + SymCryptModMul( FMod, peT[0], peZ1, peZ3, pbScratch, cbScratch ); /* Z3 := 2Y * Z1 = 2YZ */ + + SymCryptModAdd( FMod, peY3, peY3, peT[0], pbScratch, cbScratch ); /* T0 := 2YY + 2YY = 4YY */ + SymCryptModAdd( FMod, peX1, peT[1], peT[2], pbScratch, cbScratch ); /* T2 := X1 + ZZ */ + SymCryptModMul( FMod, peT[0], peY3, peY3, pbScratch, cbScratch ); /* Y3 := 2YY * 4YY = 8YYYY */ + + SymCryptModSub( FMod, peX1, peT[1], peT[1], pbScratch, cbScratch ); /* T1 := X1 - ZZ */ + SymCryptModMul( FMod, peT[0], peX1, peT[0], pbScratch, cbScratch ); /* T0 := X1 * 4YY = 4XYY = S */ + + SymCryptModMul( FMod, peT[2], peT[1], peT[2], pbScratch, cbScratch ); /* T2 := (X1 + ZZ)*(X1 - ZZ) = XX - ZZZZ */ + SymCryptModAdd( FMod, peT[2], peT[2], peT[1], pbScratch, cbScratch ); /* T1 := 2*(XX - ZZZZ) */ + SymCryptModAdd( FMod, peT[0], peT[0], peX3, pbScratch, cbScratch ); /* X3 := 2*S */ + SymCryptModAdd( FMod, peT[1], peT[2], peT[1], pbScratch, cbScratch ); /* T1 := 3*(XX - ZZZZ) = M */ + + SymCryptModSquare( FMod, peT[1], peT[2], pbScratch, cbScratch ); /* T2 := M^2 */ + SymCryptModSub( FMod, peT[2], peX3, peX3, pbScratch, cbScratch ); /* X3 := M^2 - 2*S = T */ + + SymCryptModSub( FMod, peT[0], peX3, peT[0], pbScratch, cbScratch ); /* T0 := S - T */ + SymCryptModMul( FMod, peT[1], peT[0], peT[0], pbScratch, cbScratch ); /* T0 := M * (S - T) */ + SymCryptModSub( FMod, peT[0], peY3, peY3, pbScratch, cbScratch ); /* Y3 := M * (S - T) - 8*YYYY */ +} + +// +// based on add-2007-bl formula +// but tweaked by saml to +// remove overeager conversions from modular multiplication to modular squaring which introduce +// more addition/subtraction. +// +// Z1Z1 = Z1^2 +// Z2Z2 = Z2^2 +// U1 = X1*Z2Z2 +// U2 = X2*Z1Z1 +// S1 = Y1*Z2*Z2Z2 +// S2 = Y2*Z1*Z1Z1 +// H = U2-U1 +// 2H = 2*H +// I = (2H)^2 +// J = H*I +// r = 2*(S2-S1) +// V = U1*I +// X3 = r^2-J-2*V +// Y3 = r*(V-X3)-2*S1*J +// Z3 = (Z1*Z2)*2H +// +// Total cost: +// 12 Mul +// 4 Sqr +// 0 Add +// 7 Sub +// 3 Dbl +// +// Special Case: +// If the two source points are opposite (X1 / Z1^2 == X2 / Z2^2), +// then H = U2-U1 = 0. Thus Z3 = 0 and the result is correct. +// +VOID +SYMCRYPT_CALL +SymCryptShortWeierstrassAddDiffNonZero( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc1, + _In_ PCSYMCRYPT_ECPOINT poSrc2, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + PCSYMCRYPT_MODULUS FMod = pCurve->FMod; + + PCSYMCRYPT_MODELEMENT peX1 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poSrc1 ); + PCSYMCRYPT_MODELEMENT peY1 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poSrc1 ); + PCSYMCRYPT_MODELEMENT peZ1 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poSrc1 ); + + PCSYMCRYPT_MODELEMENT peX2 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poSrc2 ); + PCSYMCRYPT_MODELEMENT peY2 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poSrc2 ); + PCSYMCRYPT_MODELEMENT peZ2 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poSrc2 ); + + PSYMCRYPT_MODELEMENT peX3 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poDst ); + PSYMCRYPT_MODELEMENT peY3 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poDst ); + PSYMCRYPT_MODELEMENT peZ3 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poDst ); + + PSYMCRYPT_MODELEMENT peT[7] = { 0 }; // Temporaries + + SYMCRYPT_ASSERT( SYMCRYPT_CURVE_IS_SHORT_WEIERSTRASS_TYPE(pCurve) ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poSrc1->pCurve) && SymCryptEcurveIsSame(pCurve, poSrc2->pCurve) && SymCryptEcurveIsSame(pCurve, poDst->pCurve) ); + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pCurve->FModDigits ) + 7 * pCurve->cbModElement ); + + // Creating temporaries + for (UINT32 i=0; i<7; i++) + { + peT[i] = SymCryptModElementCreate( + pbScratch, + pCurve->cbModElement, + FMod ); + + SYMCRYPT_ASSERT( peT[i] != NULL); + + pbScratch += pCurve->cbModElement; + } + + // Fixing remaining scratch space size + cbScratch -= 7*pCurve->cbModElement; + + // Calculation + + SymCryptModSquare( FMod, peZ1, peT[0], pbScratch, cbScratch ); /* T0 := Z1 * Z1 = Z1Z1 */ + SymCryptModMul( FMod, peZ1, peT[0], peT[1], pbScratch, cbScratch ); /* T1 := Z1*Z1Z1 */ + + SymCryptModSquare( FMod, peZ2, peT[6], pbScratch, cbScratch ); /* T6 := Z2 * Z2 = Z2Z2 */ + SymCryptModMul( FMod, peX1, peT[6], peT[2], pbScratch, cbScratch ); /* T2 := X1 * T6 = X1*Z2Z2 = U1 */ + SymCryptModMul( FMod, peX2, peT[0], peT[3], pbScratch, cbScratch ); /* T3 := X2 * Z1Z1 = U2 */ + SymCryptModSub( FMod, peT[3], peT[2], peT[5], pbScratch, cbScratch ); /* T5 := T3 - T2 = U2 - U1 = H */ + SymCryptModAdd( FMod, peT[5], peT[5], peT[3], pbScratch, cbScratch ); /* T3 := T5 + T5 = 2H */ + + SymCryptModMul( FMod, peZ1, peZ2, peT[4], pbScratch, cbScratch ); /* T4 := Z1 * Z2 */ + + SymCryptModMul( FMod, peZ2, peT[6], peT[6], pbScratch, cbScratch ); /* T6 := Z2 * T6 = Z2*Z2Z2 */ + SymCryptModMul( FMod, peT[4], peT[3], peZ3, pbScratch, cbScratch ); /* Z3 := T4 * T3 = Z1*Z2*2H */ + + SymCryptModMul( FMod, peY1, peT[6], peT[6], pbScratch, cbScratch ); /* T6 := Y1 * T6 = Y1*Z2*Z2Z2 = S1 */ + SymCryptModMul( FMod, peY2, peT[1], peT[4], pbScratch, cbScratch ); /* T4 := Y2*Z1*Z1Z1 = S2 */ + SymCryptModSub( FMod, peT[4], peT[6], peT[4], pbScratch, cbScratch ); /* T4 := T4 - T6 = S2-S1 */ + SymCryptModAdd( FMod, peT[4], peT[4], peT[4], pbScratch, cbScratch ); /* T4 := T4 + T4 = 2*(S2-S1) = r */ + + SymCryptModSquare( FMod, peT[3], peT[3], pbScratch, cbScratch ); /* T3 := T3 * T3 = (2*H)^2 = I */ + SymCryptModMul( FMod, peT[3], peT[5], peT[5], pbScratch, cbScratch ); /* T5 := T3 * T5 = H*I = J */ + SymCryptModMul( FMod, peT[2], peT[3], peT[3], pbScratch, cbScratch ); /* T3 := T2 * T3 = U1*I = V */ + + SymCryptModSquare( FMod, peT[4], peT[2], pbScratch, cbScratch ); /* T2 := T4 * T4 = r^2 */ + SymCryptModSub( FMod, peT[2], peT[5], peT[2], pbScratch, cbScratch ); /* T2 := T2 - T5 = r^2 - J */ + SymCryptModSub( FMod, peT[2], peT[3], peT[2], pbScratch, cbScratch ); /* T2 := T2 - T3 = r^2 - J - V */ + SymCryptModSub( FMod, peT[2], peT[3], peX3, pbScratch, cbScratch ); /* T2 := T2 - T3 = r^2 - J - 2*V = X3 */ + + SymCryptModSub( FMod, peT[3], peX3, peT[3], pbScratch, cbScratch ); /* T3 := T3 - T2 = V - X3 */ + SymCryptModMul( FMod, peT[3], peT[4], peT[3], pbScratch, cbScratch ); /* T3 := T3 * T4 = r*(V-X3) */ + SymCryptModMul( FMod, peT[6], peT[5], peT[6], pbScratch, cbScratch ); /* T6 := T6 * T5 = S1*J */ + SymCryptModAdd( FMod, peT[6], peT[6], peT[6], pbScratch, cbScratch ); /* T6 := T6 + T6 = 2*S1*J */ + SymCryptModSub( FMod, peT[3], peT[6], peY3, pbScratch, cbScratch ); /* Y3 := T6 - T3 = r*(V-X3) - 2*S1*J */ +} + +// +// The following function is a complete **SIDE-CHANNEL-UNSAFE** +// addition of points that detects as fast as possible the special cases +// and merges the two previous calls. +// +VOID +SYMCRYPT_CALL +SymCryptShortWeierstrassAddSideChannelUnsafe( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc1, + _In_ PCSYMCRYPT_ECPOINT poSrc2, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + PCSYMCRYPT_MODULUS FMod = pCurve->FMod; + + PCSYMCRYPT_MODELEMENT peX1 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poSrc1 ); + PCSYMCRYPT_MODELEMENT peY1 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poSrc1 ); + PCSYMCRYPT_MODELEMENT peZ1 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poSrc1 ); + + PCSYMCRYPT_MODELEMENT peX2 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poSrc2 ); + PCSYMCRYPT_MODELEMENT peY2 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poSrc2 ); + PCSYMCRYPT_MODELEMENT peZ2 = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poSrc2 ); + + PSYMCRYPT_MODELEMENT peT[8] = { 0 }; // Temporaries + + SYMCRYPT_ASSERT( SYMCRYPT_CURVE_IS_SHORT_WEIERSTRASS_TYPE(pCurve) ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poSrc1->pCurve) && SymCryptEcurveIsSame(pCurve, poSrc2->pCurve) && SymCryptEcurveIsSame(pCurve, poDst->pCurve) ); + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pCurve->FModDigits ) + 8 * pCurve->cbModElement ); + + // Check if one of the points is zero + if (SymCryptModElementIsZero( FMod, peZ1 )) + { + SymCryptEcpointCopy( pCurve, poSrc2, poDst); + return; + } + + if (SymCryptModElementIsZero( FMod, peZ2 )) + { + SymCryptEcpointCopy( pCurve, poSrc1, poDst); + return; + } + + // Creating temporaries + for (UINT32 i=0; i<8; i++) + { + peT[i] = SymCryptModElementCreate( + pbScratch, + pCurve->cbModElement, + FMod ); + + SYMCRYPT_ASSERT( peT[i] != NULL); + + pbScratch += pCurve->cbModElement; + } + + // Fixing remaining scratch space size + cbScratch -= 8*pCurve->cbModElement; + + // Calculation + + SymCryptModSquare( FMod, peZ1, peT[0], pbScratch, cbScratch ); /* T0 := Z1 * Z1 = Z1Z1 */ + SymCryptModMul( FMod, peZ1, peT[0], peT[1], pbScratch, cbScratch ); /* T1 := Z1*Z1Z1 */ + + SymCryptModSquare( FMod, peZ2, peT[6], pbScratch, cbScratch ); /* T6 := Z2 * Z2 = Z2Z2 */ + SymCryptModMul( FMod, peX1, peT[6], peT[2], pbScratch, cbScratch ); /* T2 := X1 * T6 = X1*Z2Z2 = U1 */ + SymCryptModMul( FMod, peX2, peT[0], peT[3], pbScratch, cbScratch ); /* T3 := X2 * Z1Z1 = U2 */ + SymCryptModSub( FMod, peT[3], peT[2], peT[5], pbScratch, cbScratch ); /* T5 := T3 - T2 = U2 - U1 = H */ + + SymCryptModMul( FMod, peY2, peT[1], peT[7], pbScratch, cbScratch ); /* T7 := Y2 * T1 = Y2*Z1*Z1Z1 = S2 */ + SymCryptModMul( FMod, peZ2, peT[6], peT[1], pbScratch, cbScratch ); /* T1 := Z2 * T6 = Z2*Z2Z2 */ + SymCryptModMul( FMod, peY1, peT[1], peT[1], pbScratch, cbScratch ); /* T1 := Y1 * T1 = Y1*Z2*Z2Z2 = S1 */ + SymCryptModSub( FMod, peT[7], peT[1], peT[7], pbScratch, cbScratch ); /* T7 := T7 - T1 = S2-S1 */ + + if (SymCryptModElementIsZero( FMod, peT[5] ) & SymCryptModElementIsZero( FMod, peT[7] )) + { + // Points are equal - run double on poSrc1 + + SymCryptModElementCopy( FMod, peT[0], peT[4] ); /* Move Z1Z1 for later */ + + SymCryptModSquare( FMod, peX1, peT[0], pbScratch, cbScratch ); /* T0 := X1 * X1 = XX */ + SymCryptModSquare( FMod, peY1, peT[3], pbScratch, cbScratch ); /* T3 := Y1 * Y1 = YY */ + SymCryptModSquare( FMod, peT[3], peT[5], pbScratch, cbScratch ); /* T5 := T3 * T3 = YYYY */ + + SymCryptModAdd( FMod, peX1, peT[3], peT[1], pbScratch, cbScratch ); /* T1 := X1 + T3 = X + YY */ + SymCryptModSquare( FMod, peT[1], peT[1], pbScratch, cbScratch ); /* T1 := T1 * T1 = (X + YY)^2 */ + SymCryptModSub( FMod, peT[1], peT[0], peT[1], pbScratch, cbScratch ); /* T1 := T1 - T0 = (X + YY)^2 - XX */ + SymCryptModSub( FMod, peT[1], peT[5], peT[1], pbScratch, cbScratch ); /* T1 := T1 - T5 = (X + YY)^2 - XX - YYYY */ + SymCryptModAdd( FMod, peT[1], peT[1], peT[1], pbScratch, cbScratch ); /* T1 := T1 + T1 = 2*((X + YY)^2 - XX - YYYY) = S */ + + //SymCryptModSquare( FMod, peZ1, peT[4], pbScratch, cbScratch ); /* T4 := Z1 * Z1 = ZZ */ + + SymCryptModSquare( FMod, peT[4], peT[2], pbScratch, cbScratch ); /* T2 := T4 * T4 = ZZ^2 */ + SymCryptModMul( FMod, peT[2], pCurve->A, peT[2], pbScratch, cbScratch ); /* T2 := T2 * a = a*ZZ^2 */ + SymCryptModAdd( FMod, peT[2], peT[0], peT[2], pbScratch, cbScratch ); /* T2 := T2 + T0 = XX + a*ZZ^2 */ + SymCryptModAdd( FMod, peT[0], peT[0], peT[0], pbScratch, cbScratch ); /* T0 := T0 + T0 = 2*XX */ + SymCryptModAdd( FMod, peT[2], peT[0], peT[2], pbScratch, cbScratch ); /* T2 := T2 + T0 = 3*XX + a*ZZ^2 = M */ + + SymCryptModSquare( FMod, peT[2], peT[0], pbScratch, cbScratch ); /* T0 := T2 * T2 = M^2 */ + SymCryptModSub( FMod, peT[0], peT[1], peT[0], pbScratch, cbScratch ); /* T0 := T0 - T1 = M^2 - S */ + SymCryptModSub( FMod, peT[0], peT[1], peT[0], pbScratch, cbScratch ); /* T0 := T0 - T1 = M^2 - 2*S = T = X3 */ + + SymCryptModSub( FMod, peT[1], peT[0], peT[1], pbScratch, cbScratch ); /* T1 := T1 - T0 = S - T */ + SymCryptModMul( FMod, peT[2], peT[1], peT[1], pbScratch, cbScratch ); /* T1 := T2 * T1 = M * (S - T) */ + SymCryptModAdd( FMod, peT[5], peT[5], peT[5], pbScratch, cbScratch ); /* T5 := T5 + T5 = 2*YYYY */ + SymCryptModAdd( FMod, peT[5], peT[5], peT[5], pbScratch, cbScratch ); /* T5 := T5 + T5 = 4*YYYY */ + SymCryptModAdd( FMod, peT[5], peT[5], peT[5], pbScratch, cbScratch ); /* T5 := T5 + T5 = 8*YYYY */ + SymCryptModSub( FMod, peT[1], peT[5], peT[1], pbScratch, cbScratch ); /* T1 := T1 - T5 = M * (S - T) - 8*YYYY = Y3 */ + + SymCryptModAdd( FMod, peY1, peZ1, peT[2], pbScratch, cbScratch ); /* T2 := Y1 + Z1 */ + SymCryptModSquare( FMod, peT[2], peT[2], pbScratch, cbScratch ); /* T2 := T2 * T2 = (Y + Z )^2 */ + SymCryptModSub( FMod, peT[2], peT[3], peT[2], pbScratch, cbScratch ); /* T2 := T2 - T3 = (Y + Z )^2 - YY */ + SymCryptModSub( FMod, peT[2], peT[4], peT[2], pbScratch, cbScratch ); /* T2 := T2 - T4 = (Y + Z )^2 - YY - ZZ = Z3 */ + + // Setting the result + SymCryptModElementCopy( FMod, peT[0], SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poDst ) ); + SymCryptModElementCopy( FMod, peT[1], SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poDst ) ); + SymCryptModElementCopy( FMod, peT[2], SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poDst ) ); + } + else + { + // Continue the addition + + SymCryptModAdd( FMod, peZ1, peZ2, peT[4], pbScratch, cbScratch ); /* T4 := Z1 + Z2 */ + SymCryptModSquare( FMod, peT[4], peT[4], pbScratch, cbScratch ); /* T4 := T4 * T4 = (Z1 + Z2)^2 */ + SymCryptModSub( FMod, peT[4], peT[0], peT[4], pbScratch, cbScratch ); /* T4 := T4 - Z1Z1 = (Z1 + Z2)^2 - Z1Z1 */ + SymCryptModSub( FMod, peT[4], peT[6], peT[4], pbScratch, cbScratch ); /* T4 := T4 - T6 = (Z1 + Z2)^2 - Z1Z1 - Z2Z2 */ + SymCryptModMul( FMod, peT[4], peT[5], peT[4], pbScratch, cbScratch ); /* T4 := T4 * T5 = ((Z1 + Z2)^2 - Z1Z1 - Z2Z2)*H = Z3 */ + + SymCryptModAdd( FMod, peT[7], peT[7], peT[7], pbScratch, cbScratch ); /* T7 := T7 + T7 = 2*(S2-S1) = r */ + + SymCryptModAdd( FMod, peT[5], peT[5], peT[3], pbScratch, cbScratch ); /* T3 := T5 + T5 = 2*H */ + SymCryptModSquare( FMod, peT[3], peT[3], pbScratch, cbScratch ); /* T3 := T3 * T3 = (2*H)^2 = I */ + SymCryptModMul( FMod, peT[3], peT[5], peT[5], pbScratch, cbScratch ); /* T5 := T3 * T5 = H*I = J */ + SymCryptModMul( FMod, peT[2], peT[3], peT[3], pbScratch, cbScratch ); /* T3 := T2 * T3 = U1*I = V */ + + SymCryptModSquare( FMod, peT[7], peT[2], pbScratch, cbScratch ); /* T2 := T7 * T7 = r^2 */ + SymCryptModSub( FMod, peT[2], peT[5], peT[2], pbScratch, cbScratch ); /* T2 := T2 - T5 = r^2 - J */ + SymCryptModSub( FMod, peT[2], peT[3], peT[2], pbScratch, cbScratch ); /* T2 := T2 - T3 = r^2 - J - V */ + SymCryptModSub( FMod, peT[2], peT[3], peT[2], pbScratch, cbScratch ); /* T2 := T2 - T3 = r^2 - J - 2*V = X3 */ + + SymCryptModSub( FMod, peT[3], peT[2], peT[3], pbScratch, cbScratch ); /* T3 := T3 - T2 = V - X3 */ + SymCryptModMul( FMod, peT[3], peT[7], peT[3], pbScratch, cbScratch ); /* T3 := T3 * T7 = r*(V-X3) */ + SymCryptModMul( FMod, peT[1], peT[5], peT[6], pbScratch, cbScratch ); /* T6 := T1 * T5 = S1*J */ + SymCryptModAdd( FMod, peT[6], peT[6], peT[6], pbScratch, cbScratch ); /* T6 := T6 + T6 = 2*S1*J */ + SymCryptModSub( FMod, peT[3], peT[6], peT[3], pbScratch, cbScratch ); /* T3 := T6 - T3 = r*(V-X3) - 2*S1*J = Y3 */ + + // Setting the result + SymCryptModElementCopy( FMod, peT[2], SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poDst ) ); + SymCryptModElementCopy( FMod, peT[3], SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poDst ) ); + SymCryptModElementCopy( FMod, peT[4], SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poDst ) ); + } +} + +VOID +SYMCRYPT_CALL +SymCryptShortWeierstrassAdd( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc1, + _In_ PCSYMCRYPT_ECPOINT poSrc2, + _Out_ PSYMCRYPT_ECPOINT poDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 dSrc1Zero = 0; + UINT32 dSrc2Zero = 0; + UINT32 dSrcEqual = 0; + + // Temporary points + PSYMCRYPT_ECPOINT poQ0 = NULL; + PSYMCRYPT_ECPOINT poQ1 = NULL; + + SIZE_T cbEcpoint = SymCryptSizeofEcpointFromCurve( pCurve ); + + SYMCRYPT_ASSERT( SYMCRYPT_CURVE_IS_SHORT_WEIERSTRASS_TYPE(pCurve) ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poSrc1->pCurve) && SymCryptEcurveIsSame(pCurve, poSrc2->pCurve) && SymCryptEcurveIsSame(pCurve, poDst->pCurve) ); + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS( pCurve ) ); // We will need the entire scratch space + + SYMCRYPT_ASSERT( cbScratch > 2*cbEcpoint ); + + if ((flags & SYMCRYPT_FLAG_DATA_PUBLIC) != 0) + { + SymCryptShortWeierstrassAddSideChannelUnsafe( pCurve, poSrc1, poSrc2, poDst, pbScratch, cbScratch ); + } + else + { + // Creating temporary points + poQ0 = SymCryptEcpointCreate( pbScratch, cbEcpoint, pCurve ); + SYMCRYPT_ASSERT( poQ0 != NULL); + pbScratch += cbEcpoint; + + poQ1 = SymCryptEcpointCreate( pbScratch, cbEcpoint, pCurve ); + SYMCRYPT_ASSERT( poQ1 != NULL); + pbScratch += cbEcpoint; + + // Fixing remaining scratch space size + cbScratch -= 2*cbEcpoint; + + // Calculate the masks + dSrc1Zero = SymCryptShortWeierstrassIsZero( pCurve, poSrc1, pbScratch, cbScratch ); + dSrc2Zero = SymCryptShortWeierstrassIsZero( pCurve, poSrc2, pbScratch, cbScratch ); + dSrcEqual = SymCryptShortWeierstrassIsEqual( pCurve, poSrc1, poSrc2, SYMCRYPT_FLAG_ECPOINT_EQUAL, pbScratch, cbScratch ); + + // Side-channel safe computations + SymCryptShortWeierstrassAddDiffNonZero( pCurve, poSrc1, poSrc2, poQ0, pbScratch, cbScratch ); // This covers the cases where Src1 != Src2 or Src1 = -Src2 + + SymCryptEcpointDouble( pCurve, poSrc1, poQ1, 0, pbScratch, cbScratch ); // Dispatch to Double function; enables type assertion on SymCryptShortWeierstrassDouble to be specific + SymCryptEcpointMaskedCopy( pCurve, poQ1, poQ0, dSrcEqual ); // (Masked) copy if the points are equal + + SymCryptEcpointMaskedCopy( pCurve, poSrc1, poQ0, dSrc2Zero ); // (Masked) copy if Src2 = 0 + SymCryptEcpointMaskedCopy( pCurve, poSrc2, poQ0, dSrc1Zero ); // (Masked) copy if Src1 = 0 + + SymCryptEcpointCopy( pCurve, poQ0, poDst ); // Copy the final result to destination + } +} + +VOID +SYMCRYPT_CALL +SymCryptShortWeierstrassNegate( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Inout_ PSYMCRYPT_ECPOINT poSrc, + UINT32 mask, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + PCSYMCRYPT_MODULUS FMod = pCurve->FMod; + PSYMCRYPT_MODELEMENT peY = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poSrc); + + PSYMCRYPT_MODELEMENT peTmp = NULL; + + SYMCRYPT_ASSERT( SYMCRYPT_CURVE_IS_SHORT_WEIERSTRASS_TYPE(pCurve) ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poSrc->pCurve) ); + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pCurve->FModDigits ) + pCurve->cbModElement); + + peTmp = SymCryptModElementCreate( + pbScratch, + pCurve->cbModElement, + FMod ); + SYMCRYPT_ASSERT( peTmp != NULL); + + pbScratch += pCurve->cbModElement; + cbScratch -= pCurve->cbModElement; + + SymCryptModNeg( FMod, peY, peTmp, pbScratch, cbScratch ); + SymCryptModElementMaskedCopy( FMod, peTmp, peY, mask ); +} diff --git a/libs/symcrypt/lib/ec_twisted_edwards.c b/libs/symcrypt/lib/ec_twisted_edwards.c new file mode 100644 index 00000000000..9d4371c40d0 --- /dev/null +++ b/libs/symcrypt/lib/ec_twisted_edwards.c @@ -0,0 +1,575 @@ +// +// ec_twisted_edwards.c Twisted Edwards Curve Implementation +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +VOID +SYMCRYPT_CALL +SymCryptTwistedEdwardsFillScratchSpaces( _In_ PSYMCRYPT_ECURVE pCurve ) +{ + UINT32 nDigits = SymCryptDigitsFromBits( pCurve->FModBitsize ); + UINT32 cbModElement = pCurve->cbModElement; + UINT32 nDigitsFieldLength = pCurve->FModDigits; + + // + // All the scratch space computations are upper bounded by the SizeofXXX bound (2^19) and + // the SCRATCH_BYTES_FOR_XXX bound (2^24) (see symcrypt_internal.h). + // + // One caveat is SymCryptSizeofEcpointFromCurve and SymCryptSizeofEcpointEx which calculate + // the size of EcPoint with 4 coordinates (each one a modelement of max size 2^17). Thus upper + // bounded by 2^20. + // + // Another is the precomp points computation where the nPrecompPoints are up to + // 2^SYMCRYPT_ECURVE_SW_DEF_WINDOW = 2^6 and the nRecodedDigits are equal to the + // GOrd bitsize < 2^20. + // + // Thus cbScratchScalarMulti is upper bounded by 2^6*2^20 + 2*2^20*2^4 ~ 2^26. + // + + pCurve->cbScratchCommon = 8 * cbModElement + SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigits ); + + pCurve->cbScratchScalar = + (pCurve->cbModElement) + + 2 * SymCryptSizeofEcpointFromCurve( pCurve ) + + 2 * SymCryptSizeofIntFromDigits( pCurve->GOrdDigits ) + + SYMCRYPT_MAX( pCurve->cbScratchCommon, SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pCurve->GOrdDigits )); + + pCurve->cbScratchScalarMulti = + pCurve->info.sw.nPrecompPoints * SymCryptSizeofEcpointFromCurve( pCurve ) + + ((2*pCurve->info.sw.nRecodedDigits * sizeof(UINT32) + SYMCRYPT_ASYM_ALIGN_VALUE - 1 )/SYMCRYPT_ASYM_ALIGN_VALUE) * SYMCRYPT_ASYM_ALIGN_VALUE; + + pCurve->cbScratchGetSetValue = + SymCryptSizeofEcpointEx(cbModElement, SYMCRYPT_ECPOINT_FORMAT_MAX_LENGTH) + + 2 * cbModElement + + SYMCRYPT_MAX(SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS(nDigitsFieldLength), + SYMCRYPT_SCRATCH_BYTES_FOR_MODINV(nDigitsFieldLength)); + + pCurve->cbScratchGetSetValue = SYMCRYPT_MAX( pCurve->cbScratchGetSetValue, SymCryptSizeofIntFromDigits( nDigits ) ); + + pCurve->cbScratchEckey = + SYMCRYPT_MAX( pCurve->cbModElement + SymCryptSizeofIntFromDigits(SymCryptEcurveDigitsofScalarMultiplier(pCurve)), + SymCryptSizeofEcpointFromCurve( pCurve ) ) + + SYMCRYPT_MAX( pCurve->cbScratchScalar + pCurve->cbScratchScalarMulti, pCurve->cbScratchGetSetValue ); +} + +VOID +SYMCRYPT_CALL +SymCryptTwistedEdwardsSetDistinguished( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ASSERT( SYMCRYPT_CURVE_IS_TWISTED_EDWARDS_TYPE(pCurve) ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poDst->pCurve) ); + + UNREFERENCED_PARAMETER( pbScratch ); + UNREFERENCED_PARAMETER( cbScratch ); + + SymCryptEcpointCopy( pCurve, pCurve->G, poDst ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptTwistedEdwardsIsZero( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch) +{ + PSYMCRYPT_MODULUS pmMod = pCurve->FMod; + UINT32 dResX = 0, dResY = 0; + + SYMCRYPT_ASSERT( SYMCRYPT_CURVE_IS_TWISTED_EDWARDS_TYPE(pCurve) ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poSrc->pCurve) ); + + UNREFERENCED_PARAMETER( pbScratch ); + UNREFERENCED_PARAMETER( cbScratch ); + + PSYMCRYPT_MODELEMENT peSrcX = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poSrc ); + PSYMCRYPT_MODELEMENT peSrcY = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poSrc ); + PSYMCRYPT_MODELEMENT peSrcZ = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poSrc ); + + dResX = SymCryptModElementIsZero( pmMod, peSrcX ); + dResY = SymCryptModElementIsEqual( pmMod, peSrcY, peSrcZ ); + + return ( dResX & dResY ); +} + +// +// Verify that +// a * x^2 + y^2 = 1 + d * x^2 * y^2 +// x = X/Z, y = Y/Z, +// To avoid mod inv calculation which is expensive, +// we verify Z^2(aX^2 + Y^2) = Z^4 + d * X^2 * Y^2 +// +UINT32 +SYMCRYPT_CALL +SymCryptTwistedEdwardsOnCurve( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch) +{ + PSYMCRYPT_MODELEMENT peTemp[4]; + PSYMCRYPT_MODULUS pmMod = pCurve->FMod; + SIZE_T nBytes; + + SYMCRYPT_ASSERT( SYMCRYPT_CURVE_IS_TWISTED_EDWARDS_TYPE(pCurve) ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poSrc->pCurve) ); + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS( pCurve ) ); + + nBytes = SymCryptSizeofModElementFromModulus( pmMod ); + + SYMCRYPT_ASSERT( cbScratch >= 4*nBytes ); + + for (UINT32 i = 0; i < 4; ++i) + { + peTemp[i] = SymCryptModElementCreate( pbScratch, nBytes, pmMod ); + pbScratch += nBytes; + cbScratch -= nBytes; + } + + PSYMCRYPT_MODELEMENT peSrcX = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poSrc ); + PSYMCRYPT_MODELEMENT peSrcY = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poSrc ); + PSYMCRYPT_MODELEMENT peSrcZ = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poSrc ); + + // peTemp[0] = X^2 + SymCryptModSquare( pmMod, peSrcX, peTemp[0], pbScratch, cbScratch); + + // peTemp[1] = Y^2 + SymCryptModSquare( pmMod, peSrcY, peTemp[1], pbScratch, cbScratch); + + // peTemp[2] = Z^2 + SymCryptModSquare( pmMod, peSrcZ, peTemp[2], pbScratch, cbScratch); + + // peTemp[3] = a * X^2 + SymCryptModMul( pmMod, pCurve->A, peTemp[0], peTemp[3], pbScratch, cbScratch ); + + // peTemp[3] = a * X^2 + Y^2 + SymCryptModAdd( pmMod, peTemp[3], peTemp[1], peTemp[3], pbScratch, cbScratch ); + + // peTemp[3] = Z^2 (a * X^2 + Y^2) + SymCryptModMul( pmMod, peTemp[3], peTemp[2], peTemp[3], pbScratch, cbScratch ); + + // peTemp[1] = X^2 * Y^2 + SymCryptModMul( pmMod, peTemp[0], peTemp[1], peTemp[1], pbScratch, cbScratch ); + + // peTemp[1] = d * X^2 *Y^2 + SymCryptModMul( pmMod, pCurve->B, peTemp[1], peTemp[1], pbScratch, cbScratch ); + + // peTemp[2] = Z^4 + SymCryptModMul( pmMod, peTemp[2], peTemp[2], peTemp[2], pbScratch, cbScratch ); + + // peTemp[1] = Z^4 + d * X^2 * Y^2 + SymCryptModAdd( pmMod, peTemp[2], peTemp[1], peTemp[1], pbScratch, cbScratch ); + + return SymCryptModElementIsEqual( pmMod, peTemp[1], peTemp[3] ); +} + +// +// Point doubling: dbl-2008-hwcd, 5Mul + 4Square + 2Add + 5Sub +// +// poDst (X, Y, Z, T) = 2 * poSrc(X, Y, Z, T) +// 1. A = X1 ^ 2 +// 2. B = Y1 ^ 2 +// 3. C = 2 * Z1 ^ 2 +// 4. D = a * A +// 5. E = (X1 + Y1) ^ 2 - A - B +// 6. G = D + B +// 7. F = G - C +// 8. H = D - B +// 9. X3 = E * F +// 10. Y3 = G * H +// 11. T3 = E * H +// 12. Z3 = F * G +// +VOID +SYMCRYPT_CALL +SymCryptTwistedEdwardsDouble( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_ PSYMCRYPT_ECPOINT poDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch) +{ + PSYMCRYPT_MODELEMENT peTemp[8]; + PSYMCRYPT_MODULUS pmMod = pCurve->FMod; + SIZE_T nBytes; + + SYMCRYPT_ASSERT( SYMCRYPT_CURVE_IS_TWISTED_EDWARDS_TYPE(pCurve) ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poSrc->pCurve) && SymCryptEcurveIsSame(pCurve, poDst->pCurve) ); + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS( pCurve ) ); + + UNREFERENCED_PARAMETER( flags ); + + nBytes = SymCryptSizeofModElementFromModulus( pmMod ); + + SYMCRYPT_ASSERT( cbScratch >= 8*nBytes ); + + for (UINT32 i = 0; i < 8; ++i) + { + peTemp[i] = SymCryptModElementCreate( pbScratch, nBytes, pmMod ); + pbScratch += nBytes; + cbScratch -= nBytes; + } + + PSYMCRYPT_MODELEMENT peSrcX = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poSrc ); + PSYMCRYPT_MODELEMENT peSrcY = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poSrc ); + PSYMCRYPT_MODELEMENT peSrcZ = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poSrc ); + + PSYMCRYPT_MODELEMENT peDstX = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poDst ); + PSYMCRYPT_MODELEMENT peDstY = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poDst ); + PSYMCRYPT_MODELEMENT peDstZ = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poDst ); + PSYMCRYPT_MODELEMENT peDstT = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 3, pCurve, poDst ); + + PSYMCRYPT_MODELEMENT peA = peTemp[0]; + PSYMCRYPT_MODELEMENT peB = peTemp[1]; + PSYMCRYPT_MODELEMENT peC = peTemp[2]; + PSYMCRYPT_MODELEMENT peD = peTemp[3]; + PSYMCRYPT_MODELEMENT peE = peTemp[4]; + PSYMCRYPT_MODELEMENT peF = peTemp[5]; + PSYMCRYPT_MODELEMENT peG = peTemp[6]; + PSYMCRYPT_MODELEMENT peH = peTemp[7]; + + + // A = X1^2 + SymCryptModSquare( pmMod, peSrcX, peA, pbScratch, cbScratch ); + + // B = Y1^2 + SymCryptModSquare( pmMod, peSrcY, peB, pbScratch, cbScratch ); + + // C1 = Z1^2 + SymCryptModSquare( pmMod, peSrcZ, peC, pbScratch, cbScratch ); + + // C = C1 + C1 = Z1^2 + Z1^2 = 2 * Z1^2 + SymCryptModAdd( pmMod, peC, peC, peC, pbScratch, cbScratch ); + + // D = a * A + SymCryptModMul( pmMod, pCurve->A, peA, peD, pbScratch, cbScratch ); + + // E1 = X1 + Y1 + SymCryptModAdd( pmMod, peSrcX, peSrcY, peE, pbScratch, cbScratch ); + + // E2 = E1^2 = (X1 + Y1)^2 + SymCryptModSquare( pmMod, peE, peE, pbScratch, cbScratch ); + + // E3 = E2 - A = (X1 + Y1)^2 - A + SymCryptModSub( pmMod, peE, peA, peE, pbScratch, cbScratch ); + + // E = E3 - B = (X1 + Y1)^2 - A - B + SymCryptModSub( pmMod, peE, peB, peE, pbScratch, cbScratch ); + + // G = D + B + SymCryptModAdd( pmMod, peD, peB, peG, pbScratch, cbScratch ); + + // F = G - C + SymCryptModSub( pmMod, peG, peC, peF, pbScratch, cbScratch ); + + // H = D - B + SymCryptModSub( pmMod, peD, peB, peH, pbScratch, cbScratch ); + + // X3 = E * F + SymCryptModMul( pmMod, peE, peF, peDstX, pbScratch, cbScratch ); + + // Y3 = G * H + SymCryptModMul( pmMod, peG, peH, peDstY, pbScratch, cbScratch ); + + // T3 = E * H + SymCryptModMul( pmMod, peE, peH, peDstT, pbScratch, cbScratch ); + + // Z3 = F * G + SymCryptModMul( pmMod, peF, peG, peDstZ, pbScratch, cbScratch ); +} + + +// +// Point addition: add-2008-hwcd 11Mul + 3add + 4sub +// +// poDst(X, Y, Z, T) = poSrc(X, Y, Z, T) + poSrc2(X, Y, Z, T) +// 1. A = X1 * X2 +// 2. B = Y1 * Y2 +// 3. C = d * T1 * T2 +// 4. D = Z1 * Z2 +// 5. E = (X1 + Y1) * (X2 + Y2) - A - B +// 6. F = D - C +// 7. G = D + C +// 8. H = B - a * A +// 9. X3 = E * F +// 10. Y3 = G * H +// 11. T3 = E * H +// 12. Z3 = F * G +// +VOID +SYMCRYPT_CALL +SymCryptTwistedEdwardsAdd( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc1, + _In_ PCSYMCRYPT_ECPOINT poSrc2, + _Out_ PSYMCRYPT_ECPOINT poDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + PSYMCRYPT_MODELEMENT peTemp[8]; + PSYMCRYPT_MODULUS pmMod = pCurve->FMod; + SIZE_T nBytes; + + SYMCRYPT_ASSERT( SYMCRYPT_CURVE_IS_TWISTED_EDWARDS_TYPE(pCurve) ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poSrc1->pCurve) && SymCryptEcurveIsSame(pCurve, poSrc2->pCurve) && SymCryptEcurveIsSame(pCurve, poDst->pCurve) ); + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS( pCurve ) ); + + UNREFERENCED_PARAMETER( flags ); + + nBytes = SymCryptSizeofModElementFromModulus( pmMod ); + + SYMCRYPT_ASSERT( cbScratch >= 8*nBytes ); + + for (UINT32 i = 0; i < 8; ++i) + { + peTemp[i] = SymCryptModElementCreate( pbScratch, nBytes, pmMod ); + pbScratch += nBytes; + cbScratch -= nBytes; + } + + PSYMCRYPT_MODELEMENT peSrc1X = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poSrc1 ); + PSYMCRYPT_MODELEMENT peSrc1Y = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poSrc1 ); + PSYMCRYPT_MODELEMENT peSrc1Z = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poSrc1 ); + PSYMCRYPT_MODELEMENT peSrc1T = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 3, pCurve, poSrc1 ); + + PSYMCRYPT_MODELEMENT peSrc2X = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poSrc2 ); + PSYMCRYPT_MODELEMENT peSrc2Y = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poSrc2 ); + PSYMCRYPT_MODELEMENT peSrc2Z = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poSrc2 ); + PSYMCRYPT_MODELEMENT peSrc2T = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 3, pCurve, poSrc2 ); + + PSYMCRYPT_MODELEMENT peDstX = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poDst ); + PSYMCRYPT_MODELEMENT peDstY = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poDst ); + PSYMCRYPT_MODELEMENT peDstZ = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poDst ); + PSYMCRYPT_MODELEMENT peDstT = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 3, pCurve, poDst ); + + PSYMCRYPT_MODELEMENT peA = peTemp[0]; + PSYMCRYPT_MODELEMENT peB = peTemp[1]; + PSYMCRYPT_MODELEMENT peC = peTemp[2]; + PSYMCRYPT_MODELEMENT peD = peTemp[3]; + PSYMCRYPT_MODELEMENT peE = peTemp[4]; + PSYMCRYPT_MODELEMENT peF = peTemp[5]; + PSYMCRYPT_MODELEMENT peG = peTemp[6]; + PSYMCRYPT_MODELEMENT peH = peTemp[7]; + + // A = X1 * X2 + SymCryptModMul( pmMod, peSrc1X, peSrc2X, peA, pbScratch, cbScratch ); + + // B = Y1 * Y2 + SymCryptModMul( pmMod, peSrc1Y, peSrc2Y, peB, pbScratch, cbScratch ); + + // C1 = T1 * T2 + SymCryptModMul( pmMod, peSrc1T, peSrc2T, peC, pbScratch, cbScratch ); + + // C = d * C1 = d * T1 * T2 + SymCryptModMul( pmMod, pCurve->B, peC, peC, pbScratch, cbScratch ); + + // D = Z1 * Z2 + SymCryptModMul( pmMod, peSrc1Z, peSrc2Z, peD, pbScratch, cbScratch ); + + // E1 = X1 + Y1 + SymCryptModAdd( pmMod, peSrc1X, peSrc1Y, peE, pbScratch, cbScratch ); + + // E2 = X2 + Y2 + SymCryptModAdd( pmMod, peSrc2X, peSrc2Y, peF, pbScratch, cbScratch ); + + // E = E * F + SymCryptModMul( pmMod, peE, peF, peE, pbScratch, cbScratch ); + + // E = E - A + SymCryptModSub( pmMod, peE, peA, peE, pbScratch, cbScratch ); + + // E = E - B + SymCryptModSub( pmMod, peE, peB, peE, pbScratch, cbScratch ); + + // F = D - C + SymCryptModSub( pmMod, peD, peC, peF, pbScratch, cbScratch ); + + // G = D + C + SymCryptModAdd( pmMod, peD, peC, peG, pbScratch, cbScratch ); + + // H = a * A + SymCryptModMul( pmMod, pCurve->A, peA, peH, pbScratch, cbScratch ); + + // H = B - a * A + SymCryptModSub( pmMod, peB, peH, peH, pbScratch, cbScratch ); + + // X3 = E * F + SymCryptModMul( pmMod, peE, peF, peDstX, pbScratch, cbScratch ); + + // Y3 = G * H + SymCryptModMul( pmMod, peG, peH, peDstY, pbScratch, cbScratch ); + + // T3 = E * H + SymCryptModMul( pmMod, peE, peH, peDstT, pbScratch, cbScratch ); + + // Y3 = F * G + SymCryptModMul( pmMod, peF, peG, peDstZ, pbScratch, cbScratch ); +} + +VOID +SYMCRYPT_CALL +SymCryptTwistedEdwardsAddDiffNonZero( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc1, + _In_ PCSYMCRYPT_ECPOINT poSrc2, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_(cbScratch) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SymCryptTwistedEdwardsAdd( pCurve, poSrc1, poSrc2, poDst, 0, pbScratch, cbScratch ); +} + +// +// Verify poSrc1(X1, Y1, Z1, T1) = poSrc2(X2, Y2, Z2, T2) +// To avoid ModInv for 1/Z, we do +// X1 * Z2 = X2 * Z1, and +// Y1 * Z2 = Y2 * Z1 +// +// This function also do poSrc1 = -1 * poSrc check as flags indicates +// +UINT32 +SYMCRYPT_CALL +SymCryptTwistedEdwardsIsEqual( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc1, + _In_ PCSYMCRYPT_ECPOINT poSrc2, + UINT32 flags, + _Out_writes_bytes_(cbScratch) + PBYTE pbScratch, + SIZE_T cbScratch) +{ + PSYMCRYPT_MODELEMENT peTemp[2]; + PSYMCRYPT_MODELEMENT peSrc1X, peSrc1Y, peSrc1Z; + PSYMCRYPT_MODELEMENT peSrc2X, peSrc2Y, peSrc2Z; + PSYMCRYPT_MODULUS pmMod = pCurve->FMod; + SIZE_T nBytes; + UINT32 dResX = 0; + UINT32 dResXN = 0; + UINT32 dResY = 0; + + SYMCRYPT_ASSERT( SYMCRYPT_CURVE_IS_TWISTED_EDWARDS_TYPE(pCurve) ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poSrc1->pCurve) && SymCryptEcurveIsSame(pCurve, poSrc2->pCurve) ); + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS( pCurve ) ); + + nBytes = SymCryptSizeofModElementFromModulus( pmMod ); + + SYMCRYPT_ASSERT( cbScratch >= 2*nBytes ); + + for (UINT32 i = 0; i < 2; ++i) + { + peTemp[i] = SymCryptModElementCreate( pbScratch, nBytes, pmMod ); + pbScratch += nBytes; + cbScratch -= nBytes; + } + + peSrc1X = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poSrc1 ); + peSrc1Y = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poSrc1 ); + peSrc1Z = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poSrc1 ); + + peSrc2X = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poSrc2 ); + peSrc2Y = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poSrc2 ); + peSrc2Z = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poSrc2 ); + + // Setting the default flag if flags == 0 + flags |= (SYMCRYPT_MASK32_ZERO(flags) & SYMCRYPT_FLAG_ECPOINT_EQUAL); + + // peTemp[0] = X1 * Z2 + SymCryptModMul( pmMod, peSrc1X, peSrc2Z, peTemp[0], pbScratch, cbScratch ); + + // peTemp[1] = X2 * Z1 + SymCryptModMul( pmMod, peSrc2X, peSrc1Z, peTemp[1], pbScratch, cbScratch ); + + dResX = SymCryptModElementIsEqual( pmMod, peTemp[0], peTemp[1] ); + + // Neg peTemp[1] + SymCryptModNeg(pmMod, peTemp[1], peTemp[1], pbScratch, cbScratch); + dResXN = SymCryptModElementIsEqual(pmMod, peTemp[0], peTemp[1]); + + // peTemp[0] = Y1 * Z2 + SymCryptModMul( pmMod, peSrc1Y, peSrc2Z, peTemp[0], pbScratch, cbScratch ); + + // peTemp[1] = Y2 * Z1 + SymCryptModMul( pmMod, peSrc2Y, peSrc1Z, peTemp[1], pbScratch, cbScratch ); + + dResY = SymCryptModElementIsEqual( pmMod, peTemp[0], peTemp[1] ); + + return (SYMCRYPT_MASK32_NONZERO( flags & SYMCRYPT_FLAG_ECPOINT_EQUAL ) & dResX & dResY ) | + (SYMCRYPT_MASK32_NONZERO( flags & SYMCRYPT_FLAG_ECPOINT_NEG_EQUAL ) & dResXN & dResY ); +} + +VOID +SYMCRYPT_CALL +SymCryptTwistedEdwardsSetZero( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_(cbScratch) + PBYTE pbScratch, + SIZE_T cbScratch) +{ + SYMCRYPT_ASSERT( SYMCRYPT_CURVE_IS_TWISTED_EDWARDS_TYPE(pCurve) ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poDst->pCurve) ); + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_COMMON_ECURVE_OPERATIONS( pCurve ) ); + + PSYMCRYPT_MODULUS pmMod = pCurve->FMod; + + PSYMCRYPT_MODELEMENT peDstX = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poDst ); + PSYMCRYPT_MODELEMENT peDstY = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poDst ); + PSYMCRYPT_MODELEMENT peDstZ = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poDst ); + PSYMCRYPT_MODELEMENT peDstT = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 3, pCurve, poDst ); + + SymCryptModElementSetValueUint32( 0, pmMod, peDstX, pbScratch, cbScratch ); + SymCryptModElementSetValueUint32( 1, pmMod, peDstY, pbScratch, cbScratch ); + SymCryptModElementSetValueUint32( 1, pmMod, peDstZ, pbScratch, cbScratch ); + SymCryptModElementSetValueUint32( 0, pmMod, peDstT, pbScratch, cbScratch ); +} + +VOID +SYMCRYPT_CALL +SymCryptTwistedEdwardsNegate( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Inout_ PSYMCRYPT_ECPOINT poSrc, + UINT32 mask, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + PCSYMCRYPT_MODULUS FMod = pCurve->FMod; + PSYMCRYPT_MODELEMENT peX = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0,pCurve, poSrc); + PSYMCRYPT_MODELEMENT peT = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 3,pCurve, poSrc); + + PSYMCRYPT_MODELEMENT peTmp = NULL; + + SYMCRYPT_ASSERT( SYMCRYPT_CURVE_IS_TWISTED_EDWARDS_TYPE(pCurve) ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poSrc->pCurve) ); + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pCurve->FModDigits ) + pCurve->cbModElement ); + + peTmp = SymCryptModElementCreate( + pbScratch, + pCurve->cbModElement, + FMod ); + SYMCRYPT_ASSERT( peTmp != NULL); + + pbScratch += pCurve->cbModElement; + cbScratch -= pCurve->cbModElement; + + SymCryptModNeg( FMod, peX, peTmp, pbScratch, cbScratch ); + SymCryptModElementMaskedCopy( FMod, peTmp, peX, mask ); + + SymCryptModNeg( FMod, peT, peTmp, pbScratch, cbScratch ); + SymCryptModElementMaskedCopy( FMod, peTmp, peT, mask ); +} diff --git a/libs/symcrypt/lib/eckey.c b/libs/symcrypt/lib/eckey.c new file mode 100644 index 00000000000..0e05518614e --- /dev/null +++ b/libs/symcrypt/lib/eckey.c @@ -0,0 +1,996 @@ +// +// eckey.c Functions for the ECKEY object +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// + +#include "precomp.h" + +PSYMCRYPT_ECKEY +SYMCRYPT_CALL +SymCryptEckeyAllocate( _In_ PCSYMCRYPT_ECURVE pCurve ) +{ + PVOID p; + SIZE_T cb; + PSYMCRYPT_ECKEY res = NULL; + + cb = SymCryptSizeofEckeyFromCurve( pCurve ); + + p = SymCryptCallbackAlloc( cb ); + + if ( p==NULL ) + { + goto cleanup; + } + + res = SymCryptEckeyCreate( p, cb, pCurve ); + +cleanup: + return res; +} + +VOID +SYMCRYPT_CALL +SymCryptEckeyFree( _Out_ PSYMCRYPT_ECKEY pkObj ) +{ + SYMCRYPT_CHECK_MAGIC( pkObj ); + SymCryptEckeyWipe( pkObj ); + SymCryptCallbackFree( pkObj ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptSizeofEckeyFromCurve( _In_ PCSYMCRYPT_ECURVE pCurve ) +{ + // + // From symcrypt_internal.h we have: + // - sizeof results are upper bounded by 2^19 + // - SYMCRYPT_SCRATCH_BYTES results are upper bounded by 2^27 (including RSA and ECURVE) + // - SymCryptSizeofEcpointFromCurve outputs the size of up to 4 modelements + some overhead + // Thus the following calculation does not overflow the result. + // + return sizeof(SYMCRYPT_ECKEY) + SymCryptSizeofEcpointFromCurve( pCurve ) + SymCryptSizeofIntFromDigits(SymCryptEcurveDigitsofScalarMultiplier(pCurve)); +} + +PSYMCRYPT_ECKEY +SYMCRYPT_CALL +SymCryptEckeyCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + PCSYMCRYPT_ECURVE pCurve ) +{ + PSYMCRYPT_ECKEY pkObj = NULL; + UINT32 privateKeyDigits = SymCryptEcurveDigitsofScalarMultiplier(pCurve); + + SIZE_T cbPublicKey = SymCryptSizeofEcpointFromCurve( pCurve ); + SIZE_T cbPrivateKey = SymCryptSizeofIntFromDigits( privateKeyDigits ); + + UNREFERENCED_PARAMETER( cbBuffer ); // only referenced in ASSERTs... + + SYMCRYPT_ASSERT( pCurve != NULL ); + SYMCRYPT_ASSERT( cbBuffer >= SymCryptSizeofEckeyFromCurve( pCurve ) ); + + SYMCRYPT_ASSERT( cbBuffer >= sizeof(SYMCRYPT_ECKEY) + + cbPublicKey + + cbPrivateKey ); + + SYMCRYPT_ASSERT_ASYM_ALIGNED( pbBuffer ); + + pkObj = (PSYMCRYPT_ECKEY) pbBuffer; + + pkObj->fAlgorithmInfo = 0; + pkObj->hasPrivateKey = FALSE; + pkObj->pCurve = pCurve; + + pkObj->poPublicKey = SymCryptEcpointCreate( + pbBuffer + sizeof(SYMCRYPT_ECKEY), + cbPublicKey, + pCurve ); + SYMCRYPT_ASSERT( pkObj->poPublicKey != NULL ); + + pkObj->piPrivateKey = SymCryptIntCreate( + pbBuffer + sizeof(SYMCRYPT_ECKEY) + cbPublicKey, + cbPrivateKey, + privateKeyDigits ); + SYMCRYPT_ASSERT( pkObj->piPrivateKey ); + + // Setting the magic + SYMCRYPT_SET_MAGIC( pkObj ); + + return pkObj; +} + +VOID +SYMCRYPT_CALL +SymCryptEckeyWipePrivateState( + _Inout_ PSYMCRYPT_ECKEY pkEckey ) +{ + SymCryptIntSetValueUint32( 0, pkEckey->piPrivateKey ); + pkEckey->hasPrivateKey = FALSE; +} + +VOID +SYMCRYPT_CALL +SymCryptEckeyWipe( _Out_ PSYMCRYPT_ECKEY pkDst ) +{ + // Wipe the whole structure in one go. + SymCryptWipe( pkDst, SymCryptSizeofEckeyFromCurve( pkDst->pCurve ) ); +} + +VOID +SymCryptEckeyCopy( + _In_ PCSYMCRYPT_ECKEY pkSrc, + _Out_ PSYMCRYPT_ECKEY pkDst ) +{ + // + // in-place copy is somewhat common... + // + if( pkSrc != pkDst ) + { + // Copy the fAlgorithmInfo flags + pkDst->fAlgorithmInfo = pkSrc->fAlgorithmInfo; + + // Copy the hasPrivateKey flag + pkDst->hasPrivateKey = pkSrc->hasPrivateKey; + + // Copy the public key + SymCryptEcpointCopy( pkSrc->pCurve, pkSrc->poPublicKey, pkDst->poPublicKey ); + + // Copy the private key + SymCryptIntCopy( pkSrc->piPrivateKey, pkDst->piPrivateKey ); + } +} + +UINT32 +SYMCRYPT_CALL +SymCryptEckeySizeofPublicKey( + _In_ PCSYMCRYPT_ECKEY pkEckey, + _In_ SYMCRYPT_ECPOINT_FORMAT ecPointFormat ) +{ + // + // From symcrypt_internal.h we have: + // - sizeof results are upper bounded by 2^19 + // - SYMCRYPT_SCRATCH_BYTES results are upper bounded by 2^27 (including RSA and ECURVE) + // - SymCryptEcpointFormatNumberofElements returns up to 4 elements. + // + // Thus the following calculation does not overflow cbScratch. + // + return SymCryptEcpointFormatNumberofElements[ecPointFormat] * SymCryptEcurveSizeofFieldElement( pkEckey->pCurve ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptEckeySizeofPrivateKey( _In_ PCSYMCRYPT_ECKEY pkEckey ) +{ + return SymCryptEcurveSizeofScalarMultiplier( pkEckey->pCurve ); +} + +BOOLEAN +SYMCRYPT_CALL +SymCryptEckeyHasPrivateKey( _In_ PCSYMCRYPT_ECKEY pkEckey ) +{ + return pkEckey->hasPrivateKey; +} + +#define SYMCRYPT_FLAG_ECKEY_PUBLIC_KEY_ORDER_VALIDATION (0x1) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEckeyPerformPublicKeyValidation( + _In_ PCSYMCRYPT_ECKEY pEckey, + _In_ UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PCSYMCRYPT_ECURVE pCurve = pEckey->pCurve; + + PSYMCRYPT_ECPOINT poNPub = NULL; + UINT32 cbNPub = SymCryptSizeofEcpointFromCurve( pCurve ); + + // This is an excessive amount of space to require, but all callers can currently provide it, and it's easy to phrase + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_ECKEY_ECURVE_OPERATIONS( pCurve ) ); + + SYMCRYPT_ASSERT( cbScratch >= cbNPub ); + + // Check if Public key is O + if ( SymCryptEcpointIsZero( pCurve, pEckey->poPublicKey, pbScratch, cbScratch ) ) + { + return SYMCRYPT_INVALID_ARGUMENT; + } + + // Public key is represented by Modelements of the underlying finite field for the curve + // If we have reached this point we have either: + // Constructed the Public key to have coordinates in the field (Generate case), or + // Verified the Public key has coordinates in the field (SetValue case) + + // Check that Public key is on the curve + // Skip check for Montgomery curves as we do not have an EcpointOnCurve function for them + if ( !SYMCRYPT_CURVE_IS_MONTGOMERY_TYPE(pCurve) && + !SymCryptEcpointOnCurve( pCurve, pEckey->poPublicKey, pbScratch, cbScratch ) ) + { + return SYMCRYPT_INVALID_ARGUMENT; + } + + // Perform validation that Public key is in a subgroup of order GOrd. + if ( (flags & SYMCRYPT_FLAG_ECKEY_PUBLIC_KEY_ORDER_VALIDATION) != 0 ) + { + if ( SymCryptIntIsEqualUint32( pCurve->H, 1 ) ) + { + // If cofactor is 1 then to validate that Public key has order GOrd + // it is sufficient to validate Public key is on the curve + // We just performed this check - so we are done. + } + else + { + // Ensure GOrd*(Public key) == O + poNPub = SymCryptEcpointCreate( pbScratch, cbNPub, pCurve ); + pbScratch += cbNPub; + cbScratch -= cbNPub; + + SYMCRYPT_ASSERT( poNPub != NULL ); + + // Do the multiplication + scError = SymCryptEcpointScalarMul( + pCurve, + SymCryptIntFromModulus( pCurve->GOrd ), + pEckey->poPublicKey, + 0, // Do not multiply by cofactor! + poNPub, + pbScratch, + cbScratch ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + return scError; + } + + if ( !SymCryptEcpointIsZero( pCurve, poNPub, pbScratch, cbScratch ) ) + { + return SYMCRYPT_INVALID_ARGUMENT; + } + } + } + + return SYMCRYPT_NO_ERROR; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEckeySetValue( + _In_reads_bytes_( cbPrivateKey ) + PCBYTE pbPrivateKey, + SIZE_T cbPrivateKey, + _In_reads_bytes_( cbPublicKey ) + PCBYTE pbPublicKey, + SIZE_T cbPublicKey, + SYMCRYPT_NUMBER_FORMAT numFormat, + SYMCRYPT_ECPOINT_FORMAT ecPointFormat, + UINT32 flags, + _Inout_ PSYMCRYPT_ECKEY pEckey ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PBYTE pbScratch = NULL; + UINT32 cbScratch = 0; + PBYTE pbScratchInternal = NULL; + UINT32 cbScratchInternal = 0; + + PCSYMCRYPT_ECURVE pCurve = pEckey->pCurve; + + PSYMCRYPT_ECPOINT poTmp = NULL; + UINT32 cbTmp = 0; + + PSYMCRYPT_INT piTmpInteger = NULL; + UINT32 cbTmpInteger = 0; + PSYMCRYPT_MODELEMENT peTmpModElement = NULL; + UINT32 cbTmpModElement = pCurve->cbModElement; + + UINT32 privateKeyDigits = SymCryptEcurveDigitsofScalarMultiplier(pCurve); + + UINT32 fValidatePublicKeyOrder = SYMCRYPT_FLAG_ECKEY_PUBLIC_KEY_ORDER_VALIDATION; + + // Ensure caller has specified what algorithm(s) the key will be used with + UINT32 algorithmFlags = SYMCRYPT_FLAG_ECKEY_ECDSA | SYMCRYPT_FLAG_ECKEY_ECDH; + // Make sure only allowed flags are specified + UINT32 allowedFlags = SYMCRYPT_FLAG_KEY_NO_FIPS | SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION | algorithmFlags; + + if ( ( ( flags & ~allowedFlags ) != 0 ) || + ( ( flags & algorithmFlags ) == 0 ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Check that minimal validation flag only specified with no fips + if ( ( ( flags & SYMCRYPT_FLAG_KEY_NO_FIPS ) == 0 ) && + ( ( flags & SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION ) != 0 ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if ( ( flags & SYMCRYPT_FLAG_KEY_NO_FIPS ) != 0 ) + { + fValidatePublicKeyOrder = 0; + } + + if ( ( ( cbPrivateKey == 0 ) && ( cbPublicKey == 0 ) ) || + ( ( cbPrivateKey != 0 ) && ( cbPrivateKey != SymCryptEcurveSizeofScalarMultiplier( pEckey->pCurve ) ) ) || + ( ( cbPublicKey != 0 ) && ( cbPublicKey != SymCryptEckeySizeofPublicKey( pEckey, ecPointFormat ) ) ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Allocate scratch space + cbScratch = SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_ECKEY_ECURVE_OPERATIONS( pCurve ); + pbScratch = SymCryptCallbackAlloc( cbScratch ); + if ( pbScratch == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + if ( pbPrivateKey != NULL ) + { + // + // Private key calculations + // + + pbScratchInternal = pbScratch; + cbScratchInternal = cbScratch; + + // Allocate the integer + cbTmpInteger = SymCryptSizeofIntFromDigits( privateKeyDigits ); + piTmpInteger = SymCryptIntCreate( pbScratchInternal, cbTmpInteger, privateKeyDigits ); + SYMCRYPT_ASSERT( piTmpInteger != NULL ); + + pbScratchInternal += cbTmpInteger; + cbScratchInternal -= cbTmpInteger; + + // Allocate the modelement + peTmpModElement = SymCryptModElementCreate( pbScratchInternal, cbTmpModElement, pCurve->GOrd ); + SYMCRYPT_ASSERT( peTmpModElement != NULL ); + + pbScratchInternal += cbTmpModElement; + cbScratchInternal -= cbTmpModElement; + + // Get the "raw" private key + scError = SymCryptIntSetValue( pbPrivateKey, cbPrivateKey, numFormat, piTmpInteger ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // Validation steps + if ( ( flags & SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION ) == 0 ) + { + // Perform range validation on imported Private key if it is in canonical format + if ( pCurve->PrivateKeyDefaultFormat == SYMCRYPT_ECKEY_PRIVATE_FORMAT_CANONICAL ) + { + // Check if Private key is greater than or equal to GOrd + if ( !SymCryptIntIsLessThan( piTmpInteger, SymCryptIntFromModulus( pCurve->GOrd ) ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + } + + // "TimesH" formats + // IntGetBits requirements: + // We know that coFactorPower is up to SYMCRYPT_ECURVE_MAX_COFACTOR_POWER. Thus + // less than 32 and less than the digits size in bits. + if ( (pCurve->coFactorPower>0) && + (pCurve->PrivateKeyDefaultFormat == SYMCRYPT_ECKEY_PRIVATE_FORMAT_DIVH_TIMESH) && + (SymCryptIntGetBits( piTmpInteger, 0, pCurve->coFactorPower) != 0) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + + // High bit restrictions + // IntGetBits requirements: + // Satisfied by asserting that + // HighBitRestrictionPosition + HighBitRestrictionNumOfBits <= GOrdBitsize + coFactorPower + // during EcurveAllocate. + if ( (pCurve->HighBitRestrictionNumOfBits>0) && + (SymCryptIntGetBits( + piTmpInteger, + pCurve->HighBitRestrictionPosition, + pCurve->HighBitRestrictionNumOfBits) != pCurve->HighBitRestrictionValue) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + } + + // Convert the private key to "DivH" format + if (pCurve->coFactorPower>0) + { + // "TimesH" format: Divide the input private key with the cofactor + // by shifting right the appropriate number of bits + if (pCurve->PrivateKeyDefaultFormat == SYMCRYPT_ECKEY_PRIVATE_FORMAT_DIVH_TIMESH) + { + SymCryptIntDivPow2( piTmpInteger, pCurve->coFactorPower, piTmpInteger ); + } + + // "Canonical" format: Divide by h modulo GOrd + if (pCurve->PrivateKeyDefaultFormat == SYMCRYPT_ECKEY_PRIVATE_FORMAT_CANONICAL) + { + SymCryptIntToModElement( piTmpInteger, pCurve->GOrd, peTmpModElement, pbScratchInternal, cbScratchInternal ); + SymCryptModDivPow2( pCurve->GOrd, peTmpModElement, pCurve->coFactorPower, peTmpModElement, pbScratchInternal, cbScratchInternal ); + SymCryptModElementToInt( pCurve->GOrd, peTmpModElement, piTmpInteger, pbScratchInternal, cbScratchInternal ); + } + } + + // Divide the input private key since it could be larger than subgroup order + SymCryptIntDivMod( + piTmpInteger, + SymCryptDivisorFromModulus(pCurve->GOrd), + NULL, + piTmpInteger, + pbScratchInternal, + cbScratchInternal ); + + // Check if Private key is 0 after dividing it by the subgroup order + // Other part of range validation - perform unconditionally as it is cheap + // and it never makes sense for private key to be 0 intentionally + if (SymCryptIntIsEqualUint32( piTmpInteger, 0 )) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Copy into the ECKEY + SymCryptIntCopy( piTmpInteger, pEckey->piPrivateKey ); + + pEckey->hasPrivateKey = TRUE; + } + + if ( pbPublicKey != NULL ) + { + scError = SymCryptEcpointSetValue( + pCurve, + pbPublicKey, + cbPublicKey, + numFormat, + ecPointFormat, + pEckey->poPublicKey, + SYMCRYPT_FLAG_DATA_PUBLIC, + pbScratch, + cbScratch ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Perform Public key validation on imported Public key. + if ( ( flags & SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION ) == 0 ) + { + scError = SymCryptEckeyPerformPublicKeyValidation( + pEckey, + fValidatePublicKeyOrder, + pbScratch, + cbScratch ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + } + } + + // Calculating the public key if no key was provided + // or if needed for keypair regeneration validation + if ( (pbPublicKey==NULL) || + ( ( ( flags & SYMCRYPT_FLAG_KEY_NO_FIPS ) == 0 ) && + (pbPrivateKey!=NULL) && (pbPublicKey!=NULL) ) ) + { + // Calculate the public key from the private key + pbScratchInternal = pbScratch; + cbScratchInternal = cbScratch; + + // By default calculate the Public key directly where it will be persisted + poTmp = pEckey->poPublicKey; + + if ( pbPublicKey != NULL ) + { + // If doing regeneration validation calculate the Public key in scratch + cbTmp = SymCryptSizeofEcpointFromCurve( pCurve ); + poTmp = SymCryptEcpointCreate( pbScratchInternal, cbTmp, pCurve ); + pbScratchInternal += cbTmp; + cbScratchInternal -= cbTmp; + } + + SYMCRYPT_ASSERT( poTmp != NULL ); + + // Always multiply by the cofactor since the internal format is "DIVH" + scError = SymCryptEcpointScalarMul( + pCurve, + pEckey->piPrivateKey, + NULL, + SYMCRYPT_FLAG_ECC_LL_COFACTOR_MUL, + poTmp, + pbScratchInternal, + cbScratchInternal ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + if ( pbPublicKey != NULL ) + { + if ( !SymCryptEcpointIsEqual( pCurve, poTmp, pEckey->poPublicKey, 0, pbScratchInternal, cbScratchInternal ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + } + else if ( ( flags & SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION ) == 0 ) + { + // Perform Public key validation on generated Public key. + scError = SymCryptEckeyPerformPublicKeyValidation( + pEckey, + fValidatePublicKeyOrder, + pbScratch, + cbScratch ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + } + } + + pEckey->fAlgorithmInfo = flags; // We want to track all of the flags in the Eckey + + if ( ( flags & SYMCRYPT_FLAG_KEY_NO_FIPS ) == 0 ) + { + if ( ( flags & SYMCRYPT_FLAG_ECKEY_ECDSA ) != 0 ) + { + // Ensure ECDSA algorithm selftest is run before first use of ECDSA algorithm + SYMCRYPT_RUN_SELFTEST_ONCE( + SymCryptEcDsaSelftest, + SYMCRYPT_SELFTEST_ALGORITHM_ECDSA ); + + // PCT does not need to be run on import - mark it as done + pEckey->fAlgorithmInfo |= SYMCRYPT_PCT_ECDSA; + } + + if ( ( flags & SYMCRYPT_FLAG_ECKEY_ECDH ) != 0 ) + { + SYMCRYPT_RUN_SELFTEST_ONCE( + SymCryptEcDhSecretAgreementSelftest, + SYMCRYPT_SELFTEST_ALGORITHM_ECDH ); + } + } + +cleanup: + + if ( pbScratch != NULL ) + { + SymCryptWipe( pbScratch, cbScratch ); + SymCryptCallbackFree( pbScratch ); + } + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEckeyGetValue( + _In_ PCSYMCRYPT_ECKEY pEckey, + _Out_writes_bytes_( cbPrivateKey ) + PBYTE pbPrivateKey, + SIZE_T cbPrivateKey, + _Out_writes_bytes_( cbPublicKey ) + PBYTE pbPublicKey, + SIZE_T cbPublicKey, + SYMCRYPT_NUMBER_FORMAT numFormat, + SYMCRYPT_ECPOINT_FORMAT ecPointFormat, + UINT32 flags ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PBYTE pbScratch = NULL; + UINT32 cbScratch = 0; + PBYTE pbScratchInternal = NULL; + UINT32 cbScratchInternal = 0; + + PCSYMCRYPT_ECURVE pCurve = pEckey->pCurve; + + PSYMCRYPT_INT piTmpInteger = NULL; + UINT32 cbTmpInteger = 0; + PSYMCRYPT_MODELEMENT peTmpModElement = NULL; + UINT32 cbTmpModElement = pCurve->cbModElement; + + UINT32 privateKeyDigits = SymCryptEcurveDigitsofScalarMultiplier(pCurve); + + SYMCRYPT_ASSERT( (cbPrivateKey==0) || (cbPrivateKey == SymCryptEcurveSizeofScalarMultiplier( pEckey->pCurve )) ); + SYMCRYPT_ASSERT( (cbPublicKey==0) || (cbPublicKey == SymCryptEckeySizeofPublicKey( pEckey, ecPointFormat)) ); + + // Make sure we only specify the correct flags + if (flags != 0) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Allocate scratch space + cbScratch = SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_ECKEY_ECURVE_OPERATIONS( pCurve ); + pbScratch = SymCryptCallbackAlloc( cbScratch ); + if ( pbScratch == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + pbScratchInternal = pbScratch; + cbScratchInternal = cbScratch; + + // Allocate the integer + cbTmpInteger = SymCryptSizeofIntFromDigits( privateKeyDigits ); + piTmpInteger = SymCryptIntCreate( pbScratchInternal, cbTmpInteger, privateKeyDigits ); + SYMCRYPT_ASSERT( piTmpInteger != NULL ); + + pbScratchInternal += cbTmpInteger; + cbScratchInternal -= cbTmpInteger; + + // Allocate the modelement + peTmpModElement = SymCryptModElementCreate( pbScratchInternal, cbTmpModElement, pCurve->GOrd ); + SYMCRYPT_ASSERT( peTmpModElement != NULL ); + + pbScratchInternal += cbTmpModElement; + cbScratchInternal -= cbTmpModElement; + + if ((cbPrivateKey == 0) && (cbPublicKey == 0)) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if (cbPrivateKey != 0) + { + if (!pEckey->hasPrivateKey) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + // If this keypair may be used in ECDSA, and does not have the no FIPS flag, run the PCT if + // it has not already been run + if ( ((pEckey->fAlgorithmInfo & SYMCRYPT_FLAG_ECKEY_ECDSA) != 0) && + ((pEckey->fAlgorithmInfo & SYMCRYPT_FLAG_KEY_NO_FIPS) == 0) ) + { + SYMCRYPT_RUN_KEY_GEN_PCT( + SymCryptEcDsaPct, + pEckey, + SYMCRYPT_PCT_ECDSA ); + } + + // Copy the key into the temporary integer + SymCryptIntCopy( pEckey->piPrivateKey, piTmpInteger ); + + // Convert the "DivH" format into the external format + if (pCurve->coFactorPower>0) + { + // For the "Canonical" format: Multiply the integer by h + // and then take the result modulo GOrd + if (pCurve->PrivateKeyDefaultFormat == SYMCRYPT_ECKEY_PRIVATE_FORMAT_CANONICAL) + { + SymCryptIntMulPow2( piTmpInteger, pCurve->coFactorPower, piTmpInteger ); + SymCryptIntDivMod( + piTmpInteger, + SymCryptDivisorFromModulus(pCurve->GOrd), + NULL, + piTmpInteger, + pbScratchInternal, + cbScratchInternal ); + } + + // For the "TimesH" format: Multiply the integer by h again by shifting + if (pCurve->PrivateKeyDefaultFormat == SYMCRYPT_ECKEY_PRIVATE_FORMAT_DIVH_TIMESH) + { + SymCryptIntMulPow2( piTmpInteger, pCurve->coFactorPower, piTmpInteger ); + } + } + + scError = SymCryptIntGetValue( piTmpInteger, pbPrivateKey, cbPrivateKey, numFormat ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + } + + if (cbPublicKey != 0) + { + scError = SymCryptEcpointGetValue( + pCurve, + pEckey->poPublicKey, + numFormat, + ecPointFormat, + pbPublicKey, + cbPublicKey, + SYMCRYPT_FLAG_DATA_PUBLIC, + pbScratch, + cbScratch ); + } + +cleanup: + + if ( pbScratch != NULL ) + { + SymCryptWipe( pbScratch, cbScratch ); + SymCryptCallbackFree( pbScratch ); + } + + return scError; +} + +#define SYMCRYPT_ECPOINT_SET_RANDOM_MAX_TRIES (1000) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEckeySetRandom( + _In_ UINT32 flags, + _Inout_ PSYMCRYPT_ECKEY pEckey ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PBYTE pbScratch = NULL; + UINT32 cbScratch = 0; + PBYTE pbScratchInternal = NULL; + UINT32 cbScratchInternal = 0; + + PCSYMCRYPT_ECURVE pCurve = pEckey->pCurve; + + PSYMCRYPT_ECPOINT poTmp = NULL; + UINT32 cbTmp = 0; + + INT32 cntr = SYMCRYPT_ECPOINT_SET_RANDOM_MAX_TRIES; + + PSYMCRYPT_MODELEMENT peScalar = NULL; + PSYMCRYPT_INT piScalar = NULL; + UINT32 cbScalar = 0; + + UINT32 highBitRestrictionPosition = pCurve->HighBitRestrictionPosition; + + // Ensure caller has specified what algorithm(s) the key will be used with + UINT32 algorithmFlags = SYMCRYPT_FLAG_ECKEY_ECDSA | SYMCRYPT_FLAG_ECKEY_ECDH; + // Make sure only allowed flags are specified + UINT32 allowedFlags = SYMCRYPT_FLAG_KEY_NO_FIPS | algorithmFlags; + + if ( ( ( flags & ~allowedFlags ) != 0 ) || + ( ( flags & algorithmFlags ) == 0 ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // + // From symcrypt_internal.h we have: + // - sizeof results are upper bounded by 2^19 + // - SYMCRYPT_SCRATCH_BYTES results are upper bounded by 2^27 (including RSA and ECURVE) + // Thus the following calculation does not overflow cbScratch. + // + cbScratch = SYMCRYPT_INTERNAL_SCRATCH_BYTES_FOR_ECKEY_ECURVE_OPERATIONS( pCurve ); + pbScratch = SymCryptCallbackAlloc( cbScratch ); + if ( pbScratch == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + // Allocating temporaries + pbScratchInternal = pbScratch; + cbScratchInternal = cbScratch; + + peScalar = SymCryptModElementCreate( pbScratchInternal, pCurve->cbModElement, pCurve->GOrd ); + SYMCRYPT_ASSERT( peScalar != NULL ); + + pbScratchInternal += pCurve->cbModElement; + cbScratchInternal -= pCurve->cbModElement; + + cbScalar = SymCryptSizeofIntFromDigits( SymCryptEcurveDigitsofScalarMultiplier(pCurve) ); + piScalar = SymCryptIntCreate( pbScratchInternal, cbScalar, SymCryptEcurveDigitsofScalarMultiplier(pCurve) ); + + pbScratchInternal += cbScalar; + cbScratchInternal -= cbScalar; + + // Shift the high bit position if the format is "TIMESH" + // Note: Do not actually multiply the integer as we will check if it is + // less than the group order + if (pCurve->PrivateKeyDefaultFormat == SYMCRYPT_ECKEY_PRIVATE_FORMAT_DIVH_TIMESH) + { + highBitRestrictionPosition -= pCurve->coFactorPower; + } + + // Main loop + do + { + // We perform Private key range validation by construction + // Setting a random mod element in the [1, SubgroupOrder-1] set + // This will be the "DivH" format of the private key. This means + // that PublicKey = h * PrivateKey * G + SymCryptModSetRandom( + pCurve->GOrd, + peScalar, + (SYMCRYPT_FLAG_MODRANDOM_ALLOW_ONE|SYMCRYPT_FLAG_MODRANDOM_ALLOW_MINUSONE), + pbScratchInternal, + cbScratchInternal ); + + // Converting to "canonical" format + if (pCurve->PrivateKeyDefaultFormat == SYMCRYPT_ECKEY_PRIVATE_FORMAT_CANONICAL) + { + for (UINT32 i=0; i<pCurve->coFactorPower; i++) + { + SymCryptModAdd( pCurve->GOrd, peScalar, peScalar, peScalar, pbScratchInternal, cbScratchInternal ); + } + } + + // Set the temporary scalar to verify the format + SymCryptModElementToInt( pCurve->GOrd, peScalar, piScalar, pbScratchInternal, cbScratchInternal ); + + if (pCurve->HighBitRestrictionNumOfBits > 0) + { + // Set the desired bits + SymCryptIntSetBits( + piScalar, + pCurve->HighBitRestrictionValue, + highBitRestrictionPosition, + pCurve->HighBitRestrictionNumOfBits ); + + // Make sure we didn't exceed the group order + if ( SymCryptIntIsLessThan( + piScalar, + SymCryptIntFromModulus( pCurve->GOrd )) ) + { + break; + } + } + else + { + // No high bit restriction was specified + break; + } + + cntr--; + } + while (cntr>0); + + if (cntr <= 0) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Here piScalar has a private key that satisfies the restriction(s) + // Move it to the modelement + SymCryptIntToModElement( piScalar, pCurve->GOrd, peScalar, pbScratchInternal, cbScratchInternal ); + + // Convert the private key back to "DIVH" format + if (pCurve->PrivateKeyDefaultFormat == SYMCRYPT_ECKEY_PRIVATE_FORMAT_CANONICAL) + { + SymCryptModDivPow2( pCurve->GOrd, peScalar, pCurve->coFactorPower, peScalar, pbScratchInternal, cbScratchInternal ); + } + + // Set the private key + SymCryptModElementToInt( pCurve->GOrd, peScalar, pEckey->piPrivateKey, pbScratchInternal, cbScratchInternal ); + + // Do the multiplication (pass over the entire scratch space as it is not needed anymore) + scError = SymCryptEcpointScalarMul( + pCurve, + pEckey->piPrivateKey, + NULL, + SYMCRYPT_FLAG_ECC_LL_COFACTOR_MUL, + pEckey->poPublicKey, + pbScratch, + cbScratch ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Perform range and public key order validation on generated Public key. + if ( (flags & SYMCRYPT_FLAG_KEY_NO_FIPS) == 0 ) + { + // Perform Public key validation. + // Always perform range validation and validation that Public key is in subgroup of order GOrd + scError = SymCryptEckeyPerformPublicKeyValidation( + pEckey, + SYMCRYPT_FLAG_ECKEY_PUBLIC_KEY_ORDER_VALIDATION, + pbScratch, + cbScratch ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + } + + pEckey->hasPrivateKey = TRUE; + + pEckey->fAlgorithmInfo = flags; // We want to track all of the flags in the Eckey + + if ( (flags & SYMCRYPT_FLAG_KEY_NO_FIPS) == 0 ) + { + if( ( flags & SYMCRYPT_FLAG_ECKEY_ECDSA ) != 0 ) + { + // Ensure ECDSA algorithm selftest is run before first use of ECDSA algorithm + SYMCRYPT_RUN_SELFTEST_ONCE( + SymCryptEcDsaSelftest, + SYMCRYPT_SELFTEST_ALGORITHM_ECDSA ); + } + + if( ( flags & SYMCRYPT_FLAG_ECKEY_ECDH ) != 0 ) + { + // Ensure we have run the algorithm selftest at least once. + SYMCRYPT_RUN_SELFTEST_ONCE( + SymCryptEcDhSecretAgreementSelftest, + SYMCRYPT_SELFTEST_ALGORITHM_ECDH ); + + // Run PCT eagerly so it only needs to be defined here + // The important case for performance is ECDH key generation + + // ECDH PCT per SP80056a-rev3 5.6.2.1.4 b) + // Recompute the public key from the private key + // Option a) appears to be explicitly overruled by 140-3 IG + pbScratchInternal = pbScratch; + cbScratchInternal = cbScratch; + + cbTmp = SymCryptSizeofEcpointFromCurve( pCurve ); + poTmp = SymCryptEcpointCreate( pbScratchInternal, cbTmp, pCurve ); + pbScratchInternal += cbTmp; + cbScratchInternal -= cbTmp; + + SYMCRYPT_ASSERT( poTmp != NULL ); + + // Always multiply by the cofactor since the internal format is "DIVH" + scError = SymCryptEcpointScalarMul( + pCurve, + pEckey->piPrivateKey, + NULL, + SYMCRYPT_FLAG_ECC_LL_COFACTOR_MUL, + poTmp, + pbScratchInternal, + cbScratchInternal ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + SYMCRYPT_FIPS_ASSERT( SymCryptEcpointIsEqual( pCurve, poTmp, pEckey->poPublicKey, 0, pbScratchInternal, cbScratchInternal ) ); + } + } + +cleanup: + + if ( pbScratch != NULL ) + { + SymCryptWipe( pbScratch, cbScratch ); + SymCryptCallbackFree( pbScratch ); + } + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEckeyExtendKeyUsage( + _Inout_ PSYMCRYPT_ECKEY pEckey, + UINT32 flags ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + // Ensure caller has specified what algorithm(s) the key will be used with + UINT32 algorithmFlags = SYMCRYPT_FLAG_ECKEY_ECDSA | SYMCRYPT_FLAG_ECKEY_ECDH; + + if ( ( ( flags & ~algorithmFlags ) != 0 ) || + ( ( flags & algorithmFlags ) == 0) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + pEckey->fAlgorithmInfo |= flags; + +cleanup: + return scError; +} diff --git a/libs/symcrypt/lib/ecpoint.c b/libs/symcrypt/lib/ecpoint.c new file mode 100644 index 00000000000..d2f36fa57e5 --- /dev/null +++ b/libs/symcrypt/lib/ecpoint.c @@ -0,0 +1,785 @@ +// +// ecpoint.c Ecpoint functions +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// + +#include "precomp.h" + +// Table with the number of field elements for each point format +const UINT32 SymCryptEcpointFormatNumberofElements[] = { + 0, + 1, // SYMCRYPT_ECPOINT_FORMAT_X + 2, // SYMCRYPT_ECPOINT_FORMAT_XY +}; + +UINT32 +SYMCRYPT_CALL +SymCryptSizeofEcpointEx( + UINT32 cbModElement, + UINT32 numOfCoordinates ) +{ + SYMCRYPT_ASSERT(numOfCoordinates > 0); + SYMCRYPT_ASSERT(numOfCoordinates <= SYMCRYPT_ECPOINT_FORMAT_MAX_LENGTH); + + // Callers should never specify numOfCoordinates equal to 0 or greater than + // SYMCRYPT_ECPOINT_FORMAT_MAX_LENGTH + // Return 0 to indicate failure if a caller does specify invalid numOfCoordinates + if( (numOfCoordinates == 0) || (numOfCoordinates > SYMCRYPT_ECPOINT_FORMAT_MAX_LENGTH) ) + { + return 0; + } + + // Since the maximum number of coordinates is 4 this result is bounded + // by 4*2^17 + overhead ~ 2^20 + return sizeof(SYMCRYPT_ECPOINT) + numOfCoordinates * cbModElement; +} + +UINT32 +SYMCRYPT_CALL +SymCryptSizeofEcpointFromCurve( PCSYMCRYPT_ECURVE pCurve ) +{ + // Same bound as SymCryptSizeofEcpointEx + return SymCryptSizeofEcpointEx( pCurve->cbModElement, SYMCRYPT_INTERNAL_NUMOF_COORDINATES(pCurve->eCoordinates) ); +} + +PSYMCRYPT_ECPOINT +SYMCRYPT_CALL +SymCryptEcpointAllocate( _In_ PCSYMCRYPT_ECURVE pCurve ) +{ + PVOID p = NULL; + SIZE_T cb; + PSYMCRYPT_ECPOINT res = NULL; + + cb = SymCryptSizeofEcpointFromCurve( pCurve ); + + if ( cb != 0 ) + { + p = SymCryptCallbackAlloc( cb ); + } + + if ( p==NULL ) + { + goto cleanup; + } + + res = SymCryptEcpointCreate( p, cb, pCurve ); + +cleanup: + return res; +} + +VOID +SYMCRYPT_CALL +SymCryptEcpointFree( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Out_ PSYMCRYPT_ECPOINT poDst ) +{ + SYMCRYPT_CHECK_MAGIC( poDst ); + SymCryptEcpointWipe( pCurve, poDst ); + SymCryptCallbackFree( poDst ); +} + +PSYMCRYPT_ECPOINT +SYMCRYPT_CALL +SymCryptEcpointCreateEx( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + PCSYMCRYPT_ECURVE pCurve, + UINT32 numOfCoordinates ) +{ + PSYMCRYPT_ECPOINT poPoint = NULL; + + PSYMCRYPT_MODELEMENT pmTmp = NULL; + UINT32 cbModElement = pCurve->cbModElement; + + PBYTE pbBufferEnd = pbBuffer + cbBuffer; + UNREFERENCED_PARAMETER( pbBufferEnd ); // only referenced in an ASSERT... + + SYMCRYPT_ASSERT( pCurve->FMod != 0 ); + SYMCRYPT_ASSERT( pCurve->cbModElement != 0 ); + SYMCRYPT_ASSERT( cbBuffer >= SymCryptSizeofEcpointEx( pCurve->cbModElement, numOfCoordinates ) ); + if ( cbBuffer == 0 || numOfCoordinates == 0 ) + { + goto cleanup; + } + + SYMCRYPT_ASSERT_ASYM_ALIGNED( pbBuffer ); + + poPoint = (PSYMCRYPT_ECPOINT) pbBuffer; + + pbBuffer += sizeof(SYMCRYPT_ECPOINT); + + // Setting the point coordinates + for (UINT32 i=0; i<numOfCoordinates; i++) + { + SYMCRYPT_ASSERT( pbBuffer + cbModElement <= pbBufferEnd ); + pmTmp = SymCryptModElementCreate( pbBuffer, cbModElement, pCurve->FMod ); + if ( pmTmp == NULL ) + { + poPoint = NULL; + goto cleanup; + } + pbBuffer += cbModElement; + } + + // Setting the normalized flag + poPoint->normalized = FALSE; + + // Setting the curve + poPoint->pCurve = pCurve; + + // Setting the magic + SYMCRYPT_SET_MAGIC( poPoint ); + +cleanup: + return poPoint; +} + +PSYMCRYPT_ECPOINT +SYMCRYPT_CALL +SymCryptEcpointCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + _In_ PCSYMCRYPT_ECURVE pCurve ) +{ + + SYMCRYPT_ASSERT( pCurve->eCoordinates != 0 ); + + return SymCryptEcpointCreateEx( pbBuffer, cbBuffer, pCurve, SYMCRYPT_INTERNAL_NUMOF_COORDINATES(pCurve->eCoordinates) ); +} + +PSYMCRYPT_ECPOINT +SYMCRYPT_CALL +SymCryptEcpointRetrieveHandle( _In_ PBYTE pbBuffer ) +{ + SYMCRYPT_ASSERT_ASYM_ALIGNED( pbBuffer ); + + return (PSYMCRYPT_ECPOINT) pbBuffer; +} + +VOID +SYMCRYPT_CALL +SymCryptEcpointWipe( _In_ PCSYMCRYPT_ECURVE pCurve, _Out_ PSYMCRYPT_ECPOINT poDst ) +{ + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poDst->pCurve) ); + + // Wipe the whole structure in one go. + SymCryptWipe( poDst, SymCryptSizeofEcpointFromCurve( pCurve ) ); +} + +VOID +SymCryptEcpointCopy( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_ PSYMCRYPT_ECPOINT poDst ) +{ + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poSrc->pCurve) && SymCryptEcurveIsSame(pCurve, poDst->pCurve) ); + + if( poSrc != poDst ) + { + // Unconditionally set the normalization state of destination to source + poDst->normalized = poSrc->normalized; + + memcpy(poDst + 1, poSrc + 1, SYMCRYPT_INTERNAL_NUMOF_COORDINATES(pCurve->eCoordinates) * pCurve->FModDigits * SYMCRYPT_FDEF_DIGIT_SIZE); + } +} + +VOID +SymCryptEcpointMaskedCopy( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_ PSYMCRYPT_ECPOINT poDst, + UINT32 mask ) +{ + SYMCRYPT_ASSERT( (mask == 0) || (mask == 0xffffffff) ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poSrc->pCurve) && SymCryptEcurveIsSame(pCurve, poDst->pCurve) ); + + // Unconditionally combine the normalization state of source and destination to avoid potential for + // leak of mask. Normalized is a non-secret value and is permitted to be leaked by side-channels + poDst->normalized &= poSrc->normalized; + + // dcl - this looks like the equivalent of memcpy + // should be proven that arguments cannot be the result of an integer overflow + SymCryptFdefMaskedCopy((PCBYTE)poSrc + sizeof(SYMCRYPT_ECPOINT), (PBYTE)poDst + sizeof(SYMCRYPT_ECPOINT), SYMCRYPT_INTERNAL_NUMOF_COORDINATES(pCurve->eCoordinates) * pCurve->FModDigits, mask ); +} + +// +// SymCryptEcpointTransform: Internal function to transform an ECPOINT +// from one coordinate representation to another. One point has the default +// format of the curve. The other point has a format large enough for the external +// SYMCRYPT_ECPOINT_FORMAT. +// +// When the boolean setValue is set to TRUE, the source point is the one with +// the external format eformat, and the destination point has the default +// format of the curve. If setValue = FALSE the roles are reversed. +// This function is only called by the Get / Set Value functions. +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEcpointTransform( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_ PSYMCRYPT_ECPOINT poDst, + SYMCRYPT_ECPOINT_FORMAT eformat, + BOOLEAN setValue, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PSYMCRYPT_MODELEMENT peSrc = NULL; + PSYMCRYPT_MODELEMENT peDst = NULL; + PSYMCRYPT_MODELEMENT peX = NULL; + PSYMCRYPT_MODELEMENT peY = NULL; + + SYMCRYPT_ECPOINT_COORDINATES coFrom = SYMCRYPT_ECPOINT_COORDINATES_INVALID; + SYMCRYPT_ECPOINT_COORDINATES coTo = SYMCRYPT_ECPOINT_COORDINATES_INVALID; + + PSYMCRYPT_MODELEMENT peT[2] = { 0 }; // Temporaries + + SYMCRYPT_ASSERT( (flags & ~SYMCRYPT_FLAG_DATA_PUBLIC) == 0 ); + SYMCRYPT_ASSERT( SymCryptEcurveIsSame(pCurve, poSrc->pCurve) && SymCryptEcurveIsSame(pCurve, poDst->pCurve) ); + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pCurve->FModDigits ), + SYMCRYPT_SCRATCH_BYTES_FOR_MODINV( pCurve->FModDigits )) + + 2 * pCurve->cbModElement ); + + // Get the assumed representation from the external format + switch (eformat) + { + case (SYMCRYPT_ECPOINT_FORMAT_X): + coFrom = SYMCRYPT_ECPOINT_COORDINATES_SINGLE; + break; + case (SYMCRYPT_ECPOINT_FORMAT_XY): + coFrom = SYMCRYPT_ECPOINT_COORDINATES_AFFINE; + break; + default: + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Find out whether we are setting or getting the value of the ECPOINT + if (setValue) + { + coTo = pCurve->eCoordinates; + } + else + { + coTo = coFrom; + coFrom = pCurve->eCoordinates; + } + + // Take all the possible supported transformations: + // - From SYMCRYPT_ECPOINT_COORDINATES_SINGLE to + // * SYMCRYPT_ECPOINT_COORDINATES_SINGLE (identity transformation) + // * SYMCRYPT_ECPOINT_COORDINATES_AFFINE (** Set all zeros to the Y coordinate **) + // * SYMCRYPT_ECPOINT_COORDINATES_SINGLE_PROJECTIVE + // - From SYMCRYPT_ECPOINT_COORDINATES_AFFINE to + // * SYMCRYPT_ECPOINT_COORDINATES_SINGLE (** Ignore Y coordinate **) + // * SYMCRYPT_ECPOINT_COORDINATES_AFFINE (identity transformation) + // * SYMCRYPT_ECPOINT_COORDINATES_JACOBIAN + // * SYMCRYPT_ECPOINT_COORDINATES_EXTENDED_PROJECTIVE + // * SYMCRYPT_ECPOINT_COORDINATES_SINGLE_PROJECTIVE (** Ignore Y coordinate **) + // - From SYMCRYPT_ECPOINT_COORDINATES_JACOBIAN to + // * SYMCRYPT_ECPOINT_COORDINATES_SINGLE + // * SYMCRYPT_ECPOINT_COORDINATES_AFFINE + // * SYMCRYPT_ECPOINT_COORDINATES_JACOBIAN (identity transformation) + // - From SYMCRYPT_ECPOINT_COORDINATES_EXTENDED_PROJECTIVE to + // * SYMCRYPT_ECPOINT_COORDINATES_SINGLE + // * SYMCRYPT_ECPOINT_COORDINATES_AFFINE + // * SYMCRYPT_ECPOINT_COORDINATES_EXTENDED_PROJECTIVE (identity transformation) + // - From SYMCRYPT_ECPOINT_COORDINATES_SINGLE_PROJECTIVE + // * SYMCRYPT_ECPOINT_COORDINATES_SINGLE + // * SYMCRYPT_ECPOINT_COORDINATES_AFFINE (** Set all zeros to the Y coordinate **) + // * SYMCRYPT_ECPOINT_COORDINATES_SINGLE_PROJECTIVE (identity transformation) + + // dcl - this appears that it might be a candidate for refactoring. Lots of code that looks + // duplicated across sections. Maybe some number of small functions would make it less fragile? + if ( coFrom == coTo ) + { + SymCryptEcpointCopy( pCurve, poSrc, poDst ); // All the identity transformations. + } + else if (coFrom == SYMCRYPT_ECPOINT_COORDINATES_SINGLE) + { + if (coTo == SYMCRYPT_ECPOINT_COORDINATES_AFFINE) + { + // Copy X + peX = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poSrc ); + SYMCRYPT_ASSERT( peX != NULL ); + + peDst = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poDst ); + SYMCRYPT_ASSERT( peDst != NULL ); + + SymCryptModElementCopy( pCurve->FMod, peX, peDst ); + + // Set Y to 0 + peDst = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poDst ); + SYMCRYPT_ASSERT( peDst != NULL ); + + SymCryptModElementSetValueUint32( 0, pCurve->FMod, peDst, pbScratch, cbScratch ); + } + else if (coTo == SYMCRYPT_ECPOINT_COORDINATES_SINGLE_PROJECTIVE) + { + // Copy X + peX = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poSrc ); + SYMCRYPT_ASSERT( peX != NULL ); + + peDst = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poDst ); + SYMCRYPT_ASSERT( peDst != NULL ); + + SymCryptModElementCopy( pCurve->FMod, peX, peDst ); + + // Set Y to 1 + peDst = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poDst ); + SYMCRYPT_ASSERT( peDst != NULL ); + + SymCryptModElementSetValueUint32( 1, pCurve->FMod, peDst, pbScratch, cbScratch ); + + // Setting the normalized flag + poDst->normalized = TRUE; + } + else + { + scError = SYMCRYPT_NOT_IMPLEMENTED; + goto cleanup; + } + } + else if (coFrom == SYMCRYPT_ECPOINT_COORDINATES_AFFINE) + { + if ( (coTo == SYMCRYPT_ECPOINT_COORDINATES_SINGLE) || + (coTo == SYMCRYPT_ECPOINT_COORDINATES_JACOBIAN) || + (coTo == SYMCRYPT_ECPOINT_COORDINATES_EXTENDED_PROJECTIVE) || + (coTo == SYMCRYPT_ECPOINT_COORDINATES_SINGLE_PROJECTIVE) + ) + { + // Copy X + peX = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poSrc ); + SYMCRYPT_ASSERT( peX != NULL ); + + peDst = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poDst ); + SYMCRYPT_ASSERT( peDst != NULL ); + + SymCryptModElementCopy( pCurve->FMod, peX, peDst ); + + if ( (coTo == SYMCRYPT_ECPOINT_COORDINATES_JACOBIAN) || + (coTo == SYMCRYPT_ECPOINT_COORDINATES_EXTENDED_PROJECTIVE) ) + { + // Copy Y + peY = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poSrc ); + SYMCRYPT_ASSERT( peY != NULL ); + + peDst = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poDst ); + SYMCRYPT_ASSERT( peDst != NULL ); + + SymCryptModElementCopy( pCurve->FMod, peY, peDst ); + + // Set Z to 1 + peDst = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poDst ); + SYMCRYPT_ASSERT( peDst != NULL ); + + SymCryptModElementSetValueUint32( 1, pCurve->FMod, peDst, pbScratch, cbScratch ); + + if (coTo == SYMCRYPT_ECPOINT_COORDINATES_EXTENDED_PROJECTIVE) + { + // T = x * y * z + peDst = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 3, pCurve, poDst ); + SYMCRYPT_ASSERT( peDst != NULL ); + + SymCryptModMul( pCurve->FMod, peX, peY, peDst, pbScratch, cbScratch ); + } + + // Setting the normalized flag + poDst->normalized = TRUE; + } + else if (coTo == SYMCRYPT_ECPOINT_COORDINATES_SINGLE_PROJECTIVE) + { + // Set Y to 1 (Ignore the second coordinate of the source point) + peDst = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poDst ); + SYMCRYPT_ASSERT( peDst != NULL ); + + SymCryptModElementSetValueUint32( 1, pCurve->FMod, peDst, pbScratch, cbScratch ); + + // Setting the normalized flag + poDst->normalized = TRUE; + } + } + else + { + scError = SYMCRYPT_NOT_IMPLEMENTED; + goto cleanup; + } + } + else if (coFrom == SYMCRYPT_ECPOINT_COORDINATES_JACOBIAN) + { + if ( (coTo == SYMCRYPT_ECPOINT_COORDINATES_SINGLE) || + (coTo == SYMCRYPT_ECPOINT_COORDINATES_AFFINE) ) + { + // Creating temporaries + for (UINT32 i=0; i<2; i++) + { + peT[i] = SymCryptModElementCreate( pbScratch, pCurve->cbModElement, pCurve->FMod ); + SYMCRYPT_ASSERT( peT[i] != NULL); + + pbScratch += pCurve->cbModElement; + } + + cbScratch -= 2*pCurve->cbModElement; + + // Get the Z coordinate of the source point + peSrc = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poSrc ); + SYMCRYPT_ASSERT( peSrc != NULL ); + + // Check if Z is equal to 0 (i.e. the point is the point at infinity) + if (SymCryptModElementIsZero(pCurve->FMod, peSrc)) + { + scError = SYMCRYPT_INCOMPATIBLE_FORMAT; + goto cleanup; + } + + // Calculation + // T0 := 1 / Z + scError = SymCryptModInv( pCurve->FMod, peSrc, peT[0], flags, pbScratch, cbScratch ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + SymCryptModMul( pCurve->FMod, peT[0], peT[0], peT[1], pbScratch, cbScratch ); // T1 := T0 * T0 = 1/Z^2 + + // Get the X coordinates + peSrc = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poSrc ); + SYMCRYPT_ASSERT( peSrc != NULL ); + + peDst = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poDst ); + SYMCRYPT_ASSERT( peDst != NULL ); + + // Set the new X + SymCryptModMul( pCurve->FMod, peSrc, peT[1], peDst, pbScratch, cbScratch ); // X2 := X * T1 = X/Z^2 + + if (coTo == SYMCRYPT_ECPOINT_COORDINATES_AFFINE) + { + SymCryptModMul( pCurve->FMod, peT[0], peT[1], peT[1], pbScratch, cbScratch ); // T1 := T0 * T1 = 1/Z^3 + + // Get the Y coordinates + peSrc = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poSrc ); + SYMCRYPT_ASSERT( peSrc != NULL ); + + peDst = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poDst ); + SYMCRYPT_ASSERT( peDst != NULL ); + + // Set the new Y + SymCryptModMul( pCurve->FMod, peSrc, peT[1], peDst, pbScratch, cbScratch ); // Y2 := Y * T1 = Y/Z^3 + } + } + else + { + scError = SYMCRYPT_NOT_IMPLEMENTED; + goto cleanup; + } + } + else if ( coFrom == SYMCRYPT_ECPOINT_COORDINATES_EXTENDED_PROJECTIVE ) + { + + if ( (coTo == SYMCRYPT_ECPOINT_COORDINATES_SINGLE) || + (coTo == SYMCRYPT_ECPOINT_COORDINATES_AFFINE) ) + { + // Creating temporary + peT[0] = SymCryptModElementCreate( pbScratch, pCurve->cbModElement, pCurve->FMod ); + SYMCRYPT_ASSERT( peT[0] != NULL); + pbScratch += pCurve->cbModElement; + cbScratch -= 2*pCurve->cbModElement; + + // Get the Z coordinate of the source point + peSrc = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 2, pCurve, poSrc ); + SYMCRYPT_ASSERT( peSrc != NULL ); + + // Check if Z is equal to 0 (i.e. the point is the point at infinity) + if (SymCryptModElementIsZero(pCurve->FMod, peSrc)) + { + scError = SYMCRYPT_INCOMPATIBLE_FORMAT; + goto cleanup; + } + + // peT[0] = 1 / Z + scError = SymCryptModInv( pCurve->FMod, peSrc, peT[0], flags, pbScratch, cbScratch ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Get the X coordinates + peSrc = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poSrc ); + SYMCRYPT_ASSERT( peSrc != NULL ); + + peDst = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poDst ); + SYMCRYPT_ASSERT( peDst != NULL ); + + // x = X * (1 / Z) + SymCryptModMul( pCurve->FMod, peSrc, peT[0], peDst, pbScratch, cbScratch ); + + if (coTo == SYMCRYPT_ECPOINT_COORDINATES_AFFINE) + { + // Get the Y coordinates + peSrc = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poSrc ); + SYMCRYPT_ASSERT( peSrc != NULL ); + + peDst = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poDst ); + SYMCRYPT_ASSERT( peDst != NULL ); + + // y = Y * (1 / Z) + SymCryptModMul( pCurve->FMod, peSrc, peT[0], peDst, pbScratch, cbScratch ); + } + } + else + { + scError = SYMCRYPT_NOT_IMPLEMENTED; + goto cleanup; + } + } + else if (coFrom == SYMCRYPT_ECPOINT_COORDINATES_SINGLE_PROJECTIVE) + { + if ( (coTo == SYMCRYPT_ECPOINT_COORDINATES_SINGLE) || + (coTo == SYMCRYPT_ECPOINT_COORDINATES_AFFINE) ) + { + // Creating temporary + peT[0] = SymCryptModElementCreate( pbScratch, pCurve->cbModElement, pCurve->FMod ); + SYMCRYPT_ASSERT( peT[0] != NULL); + + pbScratch += pCurve->cbModElement; + cbScratch -= pCurve->cbModElement; + + // Get the Y coordinate of the source point + peSrc = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poSrc ); + SYMCRYPT_ASSERT( peSrc != NULL ); + + // Check if Y is equal to 0 (i.e. the point is the point at infinity) + if (SymCryptModElementIsZero(pCurve->FMod, peSrc)) + { + scError = SYMCRYPT_INCOMPATIBLE_FORMAT; + goto cleanup; + } + + // Calculation + scError = SymCryptModInv( pCurve->FMod, peSrc, peT[0], flags, pbScratch, cbScratch ); // T0 := 1 / Y + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Get the X coordinates + peSrc = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poSrc ); + SYMCRYPT_ASSERT( peSrc != NULL ); + + peDst = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 0, pCurve, poDst ); + SYMCRYPT_ASSERT( peDst != NULL ); + + // Set the new X + SymCryptModMul( pCurve->FMod, peSrc, peT[0], peDst, pbScratch, cbScratch ); // X2 := X * T0 = X/Y + + if (coTo == SYMCRYPT_ECPOINT_COORDINATES_AFFINE) + { + // Set Y to 0 + peDst = SYMCRYPT_INTERNAL_ECPOINT_COORDINATE( 1, pCurve, poDst ); + SYMCRYPT_ASSERT( peDst != NULL ); + + SymCryptModElementSetValueUint32( 0, pCurve->FMod, peDst, pbScratch, cbScratch ); + } + } + } + else + { + scError = SYMCRYPT_NOT_IMPLEMENTED; + goto cleanup; + } + +cleanup: + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEcpointSetValue( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_reads_bytes_(cbSrc) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_NUMBER_FORMAT nformat, + SYMCRYPT_ECPOINT_FORMAT eformat, + _Out_ PSYMCRYPT_ECPOINT poDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NOT_IMPLEMENTED; + PSYMCRYPT_MODELEMENT peTmp = NULL; // Temporary MODELEMENT handle + PSYMCRYPT_ECPOINT poLarge = NULL; // ECPOINT with the largest format available + UINT32 cbLarge = 0; + PSYMCRYPT_INT piTemp = NULL; + UINT32 cbTemp = 0; + UINT32 publicKeyDigits = SymCryptEcurveDigitsofFieldElement( pCurve ); + + SYMCRYPT_ASSERT( (flags & ~SYMCRYPT_FLAG_DATA_PUBLIC) == 0 ); + + SYMCRYPT_ASSERT( pCurve->FMod != 0 ); + SYMCRYPT_ASSERT( pCurve->eCoordinates != 0 ); + SYMCRYPT_ASSERT( pCurve->cbModElement != 0 ); + + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_GETSET_VALUE_ECURVE_OPERATIONS( pCurve ) ); + + // Check that the buffer is of correct size + if ( cbSrc != SymCryptEcpointFormatNumberofElements[ eformat ] * SymCryptEcurveSizeofFieldElement( pCurve ) ) + { + scError = SYMCRYPT_BUFFER_TOO_SMALL; + goto cleanup; + } + cbSrc = cbSrc / SymCryptEcpointFormatNumberofElements[ eformat ]; + + cbTemp = SymCryptSizeofIntFromDigits( publicKeyDigits ); + SYMCRYPT_ASSERT( cbScratch > cbTemp ); + + piTemp = SymCryptIntCreate( pbScratch, cbTemp, publicKeyDigits ); + + // Validate the coordinate of the input public key is less than the field modulus + for ( UINT32 i = 0; i < SymCryptEcpointFormatNumberofElements[eformat]; i++ ) + { + scError = SymCryptIntSetValue( pbSrc + i * cbSrc, cbSrc, nformat, piTemp ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + if ( !SymCryptIntIsLessThan( piTemp, SymCryptIntFromModulus( pCurve->FMod ) ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + } + + // Create the large point + cbLarge = SymCryptSizeofEcpointEx( pCurve->cbModElement, SYMCRYPT_ECPOINT_FORMAT_MAX_LENGTH ); + SYMCRYPT_ASSERT( cbScratch > cbLarge ); + poLarge = SymCryptEcpointCreateEx( pbScratch, cbLarge, pCurve, SYMCRYPT_ECPOINT_FORMAT_MAX_LENGTH ); + if ( poLarge == NULL ) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + // Setting the point coordinates into the big point + for (UINT32 i=0; i<SymCryptEcpointFormatNumberofElements[eformat]; i++) + { + peTmp = (PSYMCRYPT_MODELEMENT)((PBYTE)poLarge + SYMCRYPT_INTERNAL_ECPOINT_COORDINATE_OFFSET( pCurve, i )); + if ( peTmp == NULL ) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + scError = SymCryptModElementSetValue( + pbSrc, + cbSrc, + nformat, + pCurve->FMod, + peTmp, + pbScratch + cbLarge, + cbScratch - cbLarge ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + pbSrc += cbSrc; + } + + // Transform the big point into the destination point + scError = SymCryptEcpointTransform( pCurve, poLarge, poDst, eformat, TRUE, flags, pbScratch + cbLarge, cbScratch - cbLarge); + +cleanup: + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEcpointGetValue( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + SYMCRYPT_NUMBER_FORMAT nformat, + SYMCRYPT_ECPOINT_FORMAT eformat, + _Out_writes_bytes_(cbDst) PBYTE pbDst, + SIZE_T cbDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NOT_IMPLEMENTED; + PSYMCRYPT_MODELEMENT peTmp = NULL; // Temporary MODELEMENT handle + PSYMCRYPT_ECPOINT poLarge = NULL; // ECPOINT with the largest format available + UINT32 cbLarge = 0; + SIZE_T cbDstElem; + + SYMCRYPT_ASSERT( (flags & ~SYMCRYPT_FLAG_DATA_PUBLIC) == 0 ); + SYMCRYPT_ASSERT( pCurve->FMod != 0 ); + SYMCRYPT_ASSERT( pCurve->eCoordinates != 0 ); + SYMCRYPT_ASSERT( pCurve->cbModElement != 0 ); + + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_GETSET_VALUE_ECURVE_OPERATIONS( pCurve ) ); + + // Check that the buffer is of correct size + if ( cbDst != SymCryptEcpointFormatNumberofElements[ eformat ] * SymCryptEcurveSizeofFieldElement( pCurve ) ) + { + scError = SYMCRYPT_BUFFER_TOO_SMALL; + goto cleanup; + } + SYMCRYPT_ASSERT( SymCryptEcpointFormatNumberofElements[ eformat ] > 0 ); + cbDstElem = cbDst / SymCryptEcpointFormatNumberofElements[ eformat ]; + + // Create the big point + cbLarge = SymCryptSizeofEcpointEx( pCurve->cbModElement, SYMCRYPT_ECPOINT_FORMAT_MAX_LENGTH ); + SYMCRYPT_ASSERT( cbScratch > cbLarge ); + poLarge = SymCryptEcpointCreateEx( pbScratch, cbLarge, pCurve, SYMCRYPT_ECPOINT_FORMAT_MAX_LENGTH ); + if ( poLarge == NULL ) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + // Transform the source point into the big point if needed + scError = SymCryptEcpointTransform( pCurve, poSrc, poLarge, eformat, FALSE, flags, pbScratch + cbLarge, cbScratch - cbLarge); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // Getting the point coordinates into the destination buffer + for (UINT32 i=0; i<SymCryptEcpointFormatNumberofElements[eformat]; i++) + { + SYMCRYPT_ASSERT( cbDst >= cbDstElem ); + peTmp = (PSYMCRYPT_MODELEMENT)( (PBYTE)poLarge + SYMCRYPT_INTERNAL_ECPOINT_COORDINATE_OFFSET( pCurve, i ) ); + if ( peTmp == NULL ) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + scError = SymCryptModElementGetValue( + pCurve->FMod, + peTmp, + pbDst, + cbDstElem, + nformat, + pbScratch + cbLarge, + cbScratch - cbLarge ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + pbDst += cbDstElem; + cbDst -= cbDstElem; + } + +cleanup: + + return scError; +} diff --git a/libs/symcrypt/lib/ecurve.c b/libs/symcrypt/lib/ecurve.c new file mode 100644 index 00000000000..7e5cc37f313 --- /dev/null +++ b/libs/symcrypt/lib/ecurve.c @@ -0,0 +1,771 @@ +// +// ecurve.c Ecurve functions +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// + +#include "precomp.h" + +// Approximate number of consecutive operations with the modulus and the +// (sub)group order of the curve. These numbers can trigger special optimizations +// on the underlying code, e.g. use of Montgomery multiplication or not. +#define SYMCRYPT_INTERNAL_ECURVE_MODULUS_NUMOF_OPERATIONS( _bitsize ) ( 100 * (_bitsize) ) +#define SYMCRYPT_INTERNAL_ECURVE_GROUP_ORDER_NUMOF_OPERATIONS ( 1 ) + +// We limit the max size of the elliptic curve to avoid denial-of-service attacks when +// an attacker sends a curve specification. +// Elliptic curve operations are O(n^3) in the curve size. Theoretically SymCrypt supports +// values up to 2^20 bits at the moment, so that is 2^12 times more than a typical curve size +// of 256 bits. Operations are then 2^36 times slower, and a single operation could take months. +// Our largest curve is 521 bits, and we won't see curves > 1024 bits for a while yet. +#define SYMCRYPT_INTERNAL_MAX_ECURVE_SIZE (1024) + +// Private struct which records the sizes of various different parts of the elliptic curve +// structure. +typedef struct _SYMCRYPT_ECURVE_SIZES { + UINT32 nDigitsFieldLength; + UINT32 nDigitsSubgroupOrder; + UINT32 nDigitsCoFactor; + UINT32 cbAlloc; // Length of the whole curve buffer + UINT32 cbModulus; + UINT32 cbModElement; + UINT32 cbEcpoint; + UINT32 cbSubgroupOrder; + UINT32 cbCoFactor; + UINT32 cbScratch; + SYMCRYPT_ECPOINT_COORDINATES eCoordinates; +} SYMCRYPT_ECURVE_SIZES, *PSYMCRYPT_ECURVE_SIZES; +typedef const SYMCRYPT_ECURVE_SIZES * PCSYMCRYPT_ECURVE_SIZES; + +// Helper function which validates curve parameters and computes various buffer sizes. +static +BOOLEAN +SymCryptEcurveValidateAndComputeSizes( + _In_ PCSYMCRYPT_ECURVE_PARAMS pParams, + _Out_ PSYMCRYPT_ECURVE_SIZES pSizes ) +{ + BOOLEAN fSuccess = FALSE; + + // Check that the parameters are well formatted + SYMCRYPT_ASSERT( pParams != NULL ); + SYMCRYPT_ASSERT( (pParams->version == 1) || (pParams->version == 2) ); + SYMCRYPT_ASSERT( pParams->cbFieldLength != 0 ); + SYMCRYPT_ASSERT( pParams->cbSubgroupOrder != 0 ); + SYMCRYPT_ASSERT( pParams->cbCofactor != 0 ); + SYMCRYPT_ASSERT( (pParams->type == SYMCRYPT_ECURVE_TYPE_SHORT_WEIERSTRASS) || + (pParams->type == SYMCRYPT_ECURVE_TYPE_TWISTED_EDWARDS) || + (pParams->type == SYMCRYPT_ECURVE_TYPE_MONTGOMERY) ); + + // Reject inputs that are wildly big to avoid denial-of-service attacks. + if ( pParams->cbFieldLength > SYMCRYPT_INTERNAL_MAX_ECURVE_SIZE/8 || + pParams->cbSubgroupOrder > SYMCRYPT_INTERNAL_MAX_ECURVE_SIZE / 8 + 1 || // subgroup can be > field prime + pParams->cbCofactor > 2 || // We support co-factor = 256 + pParams->cbSeed > 256 ) + { + goto cleanup; + } + + // Getting the # of digits of the various parameters + pSizes->nDigitsFieldLength = SymCryptDigitsFromBits( pParams->cbFieldLength * 8 ); + pSizes->nDigitsSubgroupOrder = SymCryptDigitsFromBits( pParams->cbSubgroupOrder * 8 ); + pSizes->nDigitsCoFactor = SymCryptDigitsFromBits( pParams->cbCofactor * 8 ); + + // ----------------------------------------------- + // Getting the byte sizes of different objects + // ----------------------------------------------- + pSizes->cbModulus = SymCryptSizeofModulusFromDigits( pSizes->nDigitsFieldLength ); + pSizes->cbSubgroupOrder = SymCryptSizeofModulusFromDigits( pSizes->nDigitsSubgroupOrder ); + pSizes->cbCoFactor = SymCryptSizeofIntFromDigits( pSizes->nDigitsCoFactor ); + + pSizes->cbModElement = SYMCRYPT_SIZEOF_MODELEMENT_FROM_BITS( pParams->cbFieldLength * 8 ); + + // EcPoint: The curve is not initialized yet, we call the helper function. + // It depends on the default format of each curve type + switch (pParams->type) + { + case (SYMCRYPT_ECURVE_TYPE_SHORT_WEIERSTRASS): + pSizes->eCoordinates = SYMCRYPT_ECPOINT_COORDINATES_JACOBIAN; + break; + case (SYMCRYPT_ECURVE_TYPE_TWISTED_EDWARDS): + pSizes->eCoordinates = SYMCRYPT_ECPOINT_COORDINATES_EXTENDED_PROJECTIVE; + break; + case (SYMCRYPT_ECURVE_TYPE_MONTGOMERY): + pSizes->eCoordinates = SYMCRYPT_ECPOINT_COORDINATES_SINGLE_PROJECTIVE; + break; + default: + goto cleanup; + } + + pSizes->cbEcpoint = SymCryptSizeofEcpointEx( pSizes->cbModElement, SYMCRYPT_INTERNAL_NUMOF_COORDINATES( pSizes->eCoordinates ) ); + // ----------------------------------------------- + + // Compute memory needed for the curve + // + // From symcrypt_internal.h we have: + // - sizeof results are upper bounded by 2^19 + // Thus the following calculation does not overflow cbAlloc. + // + pSizes->cbAlloc = sizeof( SYMCRYPT_ECURVE ) + + pSizes->cbModulus + + 2 * pSizes->cbModElement + + pSizes->cbSubgroupOrder + + pSizes->cbCoFactor; + + if ( (pParams->type == SYMCRYPT_ECURVE_TYPE_SHORT_WEIERSTRASS) || + (pParams->type == SYMCRYPT_ECURVE_TYPE_TWISTED_EDWARDS) ) + { + // If the curve's type is short Weierstrass allocate space for 2^(w-2) ECPOINTs + // at the end of the curve's structure, where w is the width of the window. + // + // Note: The window width is fixed now. In later versions we can pass it in as a parameter. + // SYMCRYPT_ASSERT( (1 << (SYMCRYPT_ECURVE_SW_DEF_WINDOW-2)) <= SYMCRYPT_ECURVE_SW_MAX_NPRECOMP_POINTS ); + pSizes->cbAlloc += (1 << (SYMCRYPT_ECURVE_SW_DEF_WINDOW-2))*pSizes->cbEcpoint; + } + else + { + // Otherwise just allocate space for just the distinguished point + pSizes->cbAlloc += pSizes->cbEcpoint; + } + + // Compute memory needed for internal scratch space + // EcpointSetValue and SymCryptOfflinePrecomputation + + // + // From symcrypt_internal.h we have: + // - sizeof results are upper bounded by 2^19 + // - SYMCRYPT_SCRATCH_BYTES results are upper bounded by 2^27 (including RSA and ECURVE) + // - SymCryptSizeofEcpointEx is bounded by 2^20 + // Thus the following calculation does not overflow cbScratch. + // + pSizes->cbScratch = SymCryptSizeofEcpointEx( pSizes->cbModElement, SYMCRYPT_ECPOINT_FORMAT_MAX_LENGTH ) + + 8 * pSizes->cbModElement + + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pSizes->nDigitsFieldLength ), + SYMCRYPT_SCRATCH_BYTES_FOR_MODINV( pSizes->nDigitsFieldLength ) ); + // IntToModulus( FMod and GOrd ) + pSizes->cbScratch = SYMCRYPT_MAX( pSizes->cbScratch, + SYMCRYPT_SCRATCH_BYTES_FOR_INT_TO_MODULUS( SYMCRYPT_MAX(pSizes->nDigitsFieldLength, pSizes->nDigitsSubgroupOrder) ) ); + // ModElementSetValue( FMod ) + pSizes->cbScratch = SYMCRYPT_MAX( pSizes->cbScratch, + SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pSizes->nDigitsFieldLength ) ); + + fSuccess = TRUE; + +cleanup: + return fSuccess; +} + +BOOLEAN +SYMCRYPT_CALL +SymCryptEcurveBufferSizesFromParams( + _In_ PCSYMCRYPT_ECURVE_PARAMS pParams, + _Out_ SIZE_T * pcbCurve, + _Out_ SIZE_T * pcbScratch ) +{ + BOOLEAN fSuccess = FALSE; + SYMCRYPT_ECURVE_SIZES sizes; + + if ( !SymCryptEcurveValidateAndComputeSizes( pParams, &sizes )) + { + goto cleanup; + } + + *pcbCurve = sizes.cbAlloc; + *pcbScratch = sizes.cbScratch; + + fSuccess = TRUE; + +cleanup: + return fSuccess; +} + +// Internal function which actually computes and writes curve into the given buffer. +// +// This is called internally by both SymCryptEcurveCreate() and SymCryptEcurveAllocate(). +static +PSYMCRYPT_ECURVE +SymCryptEcurveInitialize( + _In_ PCSYMCRYPT_ECURVE_PARAMS pParams, + _In_ UINT32 flags, + _In_ PCSYMCRYPT_ECURVE_SIZES pSizes, + _Out_writes_bytes_( pSizes->cbAlloc ) PBYTE pbCurve, + _Out_writes_bytes_( pSizes->cbScratch) PBYTE pbScratch ) +{ + BOOLEAN fSuccess = FALSE; + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PSYMCRYPT_ECURVE pCurve = (PSYMCRYPT_ECURVE)pbCurve; + PBYTE pDst = NULL; // Destination pointer + PBYTE pSrc = NULL; // Source pointer + + PBYTE pSrcGenerator = NULL; // We have to set the generator point + // only after we have fully initialized the curve + + PSYMCRYPT_INT pTempInt = 0; + + PSYMCRYPT_MODELEMENT peTemp = NULL; + + PCSYMCRYPT_ECURVE_PARAMS_V2_EXTENSION pcParamsV2Ext = NULL; + + UNREFERENCED_PARAMETER( flags ); + + // ----------------------------------------------- + // Populating the fields of the curve object + // ----------------------------------------------- + + // Version of curve structure + pCurve->version = SYMCRYPT_INTERNAL_ECURVE_VERSION_LATEST; + + // Type of curve + pCurve->type = (int) pParams->type; + + // Curve point format + pCurve->eCoordinates = pSizes->eCoordinates; + + // Number of digits of the field modulus + pCurve->FModDigits = pSizes->nDigitsFieldLength; + + // Number of digits of the group order + pCurve->GOrdDigits = pSizes->nDigitsSubgroupOrder; + + // Byte size of field elements + pCurve->FModBytesize = (UINT32)pParams->cbFieldLength; + + // Byte size of group elements + SYMCRYPT_ASSERT( pParams->cbSubgroupOrder < UINT32_MAX ); + pCurve->GOrdBytesize = (UINT32)pParams->cbSubgroupOrder; + + // Byte size of mod elements + pCurve->cbModElement = pSizes->cbModElement; + + // Total bytesize of the curve (used to free the curve object) + pCurve->cbAlloc = pSizes->cbAlloc; + + // Set destination and source pointers + pDst = ((PBYTE) pCurve) + sizeof( SYMCRYPT_ECURVE ); + pSrc = ((PBYTE) pParams) + sizeof( SYMCRYPT_ECURVE_PARAMS ); + + // Field Modulus + pCurve->FMod = SymCryptModulusCreate( pDst, pSizes->cbModulus, pSizes->nDigitsFieldLength ); + if ( pCurve->FMod == NULL ) + { + goto cleanup; + } + + pTempInt = SymCryptIntFromModulus( pCurve->FMod ); + if ( pTempInt == NULL) + { + goto cleanup; + } + + scError = SymCryptIntSetValue( pSrc, pParams->cbFieldLength, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, pTempInt ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Field Modulus Bitsize + pCurve->FModBitsize = SymCryptIntBitsizeOfValue( pTempInt ); + if (pCurve->FModBitsize < SYMCRYPT_ECURVE_MIN_BITSIZE_FMOD) + { + scError = SYMCRYPT_WRONG_KEY_SIZE; + goto cleanup; + } + + if( (SymCryptIntGetValueLsbits32( pTempInt ) & 1) == 0 ) + { + // 'Prime' must be odd to avoid errors in conversion to modulus + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // IntToModulus requirement: + // FModBitsize >= SYMCRYPT_ECURVE_MIN_BITSIZE_FMOD --> pTempInt > 0 + SymCryptIntToModulus( + pTempInt, + pCurve->FMod, + SYMCRYPT_INTERNAL_ECURVE_MODULUS_NUMOF_OPERATIONS( 8 * pParams->cbFieldLength ), + SYMCRYPT_FLAG_DATA_PUBLIC | SYMCRYPT_FLAG_MODULUS_PRIME, + pbScratch, + pSizes->cbScratch ); + + pDst += pSizes->cbModulus; + pSrc += pParams->cbFieldLength; + + // A constant + pCurve->A = SymCryptModElementCreate( pDst, pSizes->cbModElement, pCurve->FMod ); + if ( pCurve->A == NULL ) + { + goto cleanup; + } + scError = SymCryptModElementSetValue( + pSrc, + pParams->cbFieldLength, + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, + pCurve->FMod, + pCurve->A, + pbScratch, + pSizes->cbScratch ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + pDst += pSizes->cbModElement; + pSrc += pParams->cbFieldLength; + + // B constant + pCurve->B = SymCryptModElementCreate( pDst, pSizes->cbModElement, pCurve->FMod ); + if ( pCurve->B == NULL ) + { + goto cleanup; + } + + // Detect Short-Weierstrass curves with A == -3 (NIST prime curves are all of this form) + // Use B's ModElement space for check + if( pParams->type == SYMCRYPT_ECURVE_TYPE_SHORT_WEIERSTRASS ) + { + SymCryptModElementSetValueNegUint32( + 3, + pCurve->FMod, + pCurve->B, + pbScratch, + pSizes->cbScratch ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + if( SymCryptModElementIsEqual( pCurve->FMod, pCurve->A, pCurve->B ) ) + { + pCurve->type = SYMCRYPT_INTERNAL_ECURVE_TYPE_SHORT_WEIERSTRASS_AM3; + } + } + + // Set B to the correct value + scError = SymCryptModElementSetValue( + pSrc, + pParams->cbFieldLength, + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, + pCurve->FMod, + pCurve->B, + pbScratch, + pSizes->cbScratch ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + pDst += pSizes->cbModElement; + pSrc += pParams->cbFieldLength; + + // Skip over the distinguished point until we fix all the parameters and scratch space sizes + pSrcGenerator = pSrc; + pSrc += pParams->cbFieldLength * 2; + + // Subgroup Order + pCurve->GOrd = SymCryptModulusCreate( pDst, pSizes->cbSubgroupOrder, pSizes->nDigitsSubgroupOrder ); + if ( pCurve->GOrd == NULL ) + { + goto cleanup; + } + + pTempInt = SymCryptIntFromModulus( pCurve->GOrd ); + if ( pTempInt == NULL) + { + goto cleanup; + } + + scError = SymCryptIntSetValue( pSrc, pParams->cbSubgroupOrder, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, pTempInt ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Subgroup Order Bitsize + pCurve->GOrdBitsize = SymCryptIntBitsizeOfValue( pTempInt ); + if (pCurve->GOrdBitsize < SYMCRYPT_ECURVE_MIN_BITSIZE_GORD) + { + scError = SYMCRYPT_WRONG_KEY_SIZE; + goto cleanup; + } + + if( (SymCryptIntGetValueLsbits32( pTempInt ) & 1) == 0 ) + { + // 'Prime' must be odd to avoid errors in conversion to modulus + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // IntToModulus requirement: + // GOrdBitsize >= SYMCRYPT_ECURVE_MIN_BITSIZE_GORD --> pTempInt > 0 + SymCryptIntToModulus( + pTempInt, + pCurve->GOrd, + SYMCRYPT_INTERNAL_ECURVE_GROUP_ORDER_NUMOF_OPERATIONS, + SYMCRYPT_FLAG_DATA_PUBLIC | SYMCRYPT_FLAG_MODULUS_PRIME, + pbScratch, + pSizes->cbScratch ); + + pDst += pSizes->cbSubgroupOrder; + pSrc += pParams->cbSubgroupOrder; + + // Cofactor + pCurve->H = SymCryptIntCreate( pDst, pSizes->cbCoFactor, pSizes->nDigitsCoFactor ); + if ( pCurve->H == NULL ) + { + goto cleanup; + } + scError = SymCryptIntSetValue( + pSrc, + pParams->cbCofactor, + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, + pCurve->H ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + + // Make sure that the cofactor is not zero or too big + pCurve->coFactorPower = SymCryptIntBitsizeOfValue( pCurve->H ) - 1; + if (pCurve->coFactorPower == (UINT32)-1 || pCurve->coFactorPower > SYMCRYPT_ECURVE_MAX_COFACTOR_POWER) + { + goto cleanup; + } + + // Validate that the cofactor is a power of two + if (!SymCryptIntIsEqualUint32( pCurve->H, 1<<(pCurve->coFactorPower) )) + { + goto cleanup; + } + + pDst += pSizes->cbCoFactor; + pSrc += pParams->cbCofactor; + + // Calculate scratch spaces' sizes + if (pParams->type == SYMCRYPT_ECURVE_TYPE_SHORT_WEIERSTRASS) + { + pCurve->info.sw.window = SYMCRYPT_ECURVE_SW_DEF_WINDOW; + pCurve->info.sw.nPrecompPoints = (1 << (SYMCRYPT_ECURVE_SW_DEF_WINDOW-2)); + pCurve->info.sw.nRecodedDigits = pCurve->GOrdBitsize + 1; // This is the maximum - used by the wNAF Interleaving method + } + else if ( pParams->type == SYMCRYPT_ECURVE_TYPE_TWISTED_EDWARDS ) + { + pCurve->info.sw.window = SYMCRYPT_ECURVE_SW_DEF_WINDOW; + pCurve->info.sw.nPrecompPoints = (1 << (SYMCRYPT_ECURVE_SW_DEF_WINDOW-2)); + pCurve->info.sw.nRecodedDigits = pCurve->GOrdBitsize + 1; // This is the maximum - used by the wNAF Interleaving method + } + + SymCryptEcurveFillScratchSpaces(pCurve); + + // Now set the distinguished point + pCurve->G = SymCryptEcpointCreate( pDst, pSizes->cbEcpoint, pCurve ); + if ( pCurve->G == NULL ) + { + goto cleanup; + } + scError = SymCryptEcpointSetValue( + pCurve, + pSrcGenerator, + pParams->cbFieldLength * 2, + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, + SYMCRYPT_ECPOINT_FORMAT_XY, + pCurve->G, + SYMCRYPT_FLAG_DATA_PUBLIC, + pbScratch, + pSizes->cbScratch ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + pDst += pSizes->cbEcpoint; + + // Fill the precomputed table + if ( (pParams->type == SYMCRYPT_ECURVE_TYPE_SHORT_WEIERSTRASS) || + (pParams->type == SYMCRYPT_ECURVE_TYPE_TWISTED_EDWARDS) ) + { + // The first point of the table is the generator + pCurve->info.sw.poPrecompPoints[0] = pCurve->G; + + for (UINT32 i=1; i<pCurve->info.sw.nPrecompPoints; i++) + { + pCurve->info.sw.poPrecompPoints[i] = SymCryptEcpointCreate( pDst, pSizes->cbEcpoint, pCurve ); + if ( pCurve->info.sw.poPrecompPoints[i] == NULL ) + { + goto cleanup; + } + pDst += pSizes->cbEcpoint; + } + + SymCryptOfflinePrecomputation( pCurve, pbScratch, pSizes->cbScratch ); + } + + // For Montgomery curve, we calculate A = (A + 2) / 4 + if (pParams->type == SYMCRYPT_ECURVE_TYPE_MONTGOMERY) + { + peTemp = SymCryptModElementCreate( pbScratch, pSizes->cbModElement, pCurve->FMod ); + + // SetValueUint32 requirements: + // FMod > 2 since it has more than SYMCRYPT_ECURVE_MIN_BITSIZE_FMOD bits + SymCryptModElementSetValueUint32( 2, pCurve->FMod, peTemp, pbScratch + pSizes->cbModElement, pSizes->cbScratch - pSizes->cbModElement ); + SymCryptModAdd (pCurve->FMod, pCurve->A, peTemp, pCurve->A, pbScratch + pSizes->cbModElement, pSizes->cbScratch - pSizes->cbModElement ); // A = A + 2; + SymCryptModDivPow2( pCurve->FMod, pCurve->A, 2, pCurve->A, pbScratch + pSizes->cbModElement, pSizes->cbScratch - pSizes->cbModElement ); // A = (A + 2) / 4 + } + + // Set the default curve policy for parameters of version 2 + if (pParams->version == 2) + { + // Skip over the seed (if any) + pSrc += pParams->cbSeed; + + // Copy the extension info (it can be unaligned) + pcParamsV2Ext = (PCSYMCRYPT_ECURVE_PARAMS_V2_EXTENSION) pSrc; + } + else + { + // Set the defaults for version 1 + if (pParams->type == SYMCRYPT_ECURVE_TYPE_SHORT_WEIERSTRASS) + { + pcParamsV2Ext = SymCryptEcurveParamsV2ExtensionShortWeierstrass; + } + else if ( pParams->type == SYMCRYPT_ECURVE_TYPE_TWISTED_EDWARDS ) + { + pcParamsV2Ext = SymCryptEcurveParamsV2ExtensionTwistedEdwards; + } + else if ( pParams->type == SYMCRYPT_ECURVE_TYPE_MONTGOMERY ) + { + pcParamsV2Ext = SymCryptEcurveParamsV2ExtensionMontgomery; + } + } + + pCurve->PrivateKeyDefaultFormat = pcParamsV2Ext->PrivateKeyDefaultFormat; + pCurve->HighBitRestrictionNumOfBits = pcParamsV2Ext->HighBitRestrictionNumOfBits; + pCurve->HighBitRestrictionPosition = pcParamsV2Ext->HighBitRestrictionPosition; + pCurve->HighBitRestrictionValue = pcParamsV2Ext->HighBitRestrictionValue; + + // Make sure that the HighBitRestrictions make sense + // (see SymCryptIntGet/SetBits) + if ( (pCurve->HighBitRestrictionNumOfBits>32) || + ((pCurve->HighBitRestrictionNumOfBits>0) && + (pCurve->HighBitRestrictionPosition + pCurve->HighBitRestrictionNumOfBits > pCurve->GOrdBitsize + pCurve->coFactorPower)) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Setting the magic + SYMCRYPT_SET_MAGIC( pCurve ); + + fSuccess = TRUE; + +cleanup: + if (!fSuccess) + { + SymCryptWipe( pbCurve, pSizes->cbAlloc ); + pCurve = NULL; + } + + return pCurve; +} + +PSYMCRYPT_ECURVE +SYMCRYPT_CALL +SymCryptEcurveCreate( + _In_ PSYMCRYPT_ECURVE_PARAMS pParams, + _In_ UINT32 flags, + _Out_writes_bytes_( cbCurve ) PBYTE pbCurve, + SIZE_T cbCurve, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch) +{ + SYMCRYPT_ECURVE_SIZES sizes; + + PSYMCRYPT_ECURVE pCurve = NULL; + + if ( !SymCryptEcurveValidateAndComputeSizes(pParams, &sizes) ) + { + goto cleanup; + } + + if ( cbCurve < sizes.cbAlloc ) + { + goto cleanup; + } + + if ( cbScratch < sizes.cbScratch ) + { + goto cleanup; + } + + pCurve = SymCryptEcurveInitialize( pParams, flags, &sizes, pbCurve, pbScratch ); + +cleanup: + return pCurve; +} + +PSYMCRYPT_ECURVE +SYMCRYPT_CALL +SymCryptEcurveAllocate( + _In_ PCSYMCRYPT_ECURVE_PARAMS pParams, + _In_ UINT32 flags ) +{ + SYMCRYPT_ECURVE_SIZES sizes; + + PBYTE pbCurve = NULL; + PBYTE pbScratch = NULL; + + PSYMCRYPT_ECURVE pCurve = NULL; + + if ( !SymCryptEcurveValidateAndComputeSizes(pParams, &sizes) ) + { + goto cleanup; + } + + pbCurve = SymCryptCallbackAlloc( sizes.cbAlloc ); + if ( pbCurve == NULL ) + { + goto cleanup; + } + + pbScratch = SymCryptCallbackAlloc( sizes.cbScratch ); + if ( pbScratch == NULL ) + { + goto cleanup; + } + + pCurve = SymCryptEcurveInitialize( pParams, flags, &sizes, pbCurve, pbScratch ); + if ( pCurve != NULL ) + { + pbCurve = NULL; + } + +cleanup: + if ( pbScratch != NULL ) + { + SymCryptWipe( pbScratch, sizes.cbScratch ); + SymCryptCallbackFree( pbScratch ); + } + + if ( pbCurve != NULL ) + { + SymCryptCallbackFree( pbCurve ); + } + + return pCurve; +} + +VOID +SYMCRYPT_CALL +SymCryptEcurveFree( _Out_ PSYMCRYPT_ECURVE pCurve ) +{ + SYMCRYPT_CHECK_MAGIC( pCurve ); + + SymCryptWipe( (PBYTE) pCurve, pCurve->cbAlloc ); + + SymCryptCallbackFree( pCurve ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptEcurveBitsizeofFieldModulus( _In_ PCSYMCRYPT_ECURVE pCurve ) +{ + return pCurve->FModBitsize; +} + +UINT32 +SYMCRYPT_CALL +SymCryptEcurveBitsizeofGroupOrder( _In_ PCSYMCRYPT_ECURVE pCurve ) +{ + return pCurve->GOrdBitsize; +} + +UINT32 +SYMCRYPT_CALL +SymCryptEcurveDigitsofFieldElement( _In_ PCSYMCRYPT_ECURVE pCurve ) +{ + return pCurve->FModDigits; +} + +UINT32 +SYMCRYPT_CALL +SymCryptEcurveSizeofFieldElement( _In_ PCSYMCRYPT_ECURVE pCurve ) +{ + return pCurve->FModBytesize; +} + +UINT32 +SYMCRYPT_CALL +SymCryptEcurveSizeofScalarMultiplier( _In_ PCSYMCRYPT_ECURVE pCurve ) +{ + return pCurve->GOrdBytesize; +} + +PCSYMCRYPT_MODULUS +SYMCRYPT_CALL +SymCryptEcurveGroupOrder( _In_ PCSYMCRYPT_ECURVE pCurve ) +{ + return pCurve->GOrd; +} + +UINT32 +SYMCRYPT_CALL +SymCryptEcurveDigitsofScalarMultiplier( _In_ PCSYMCRYPT_ECURVE pCurve ) +{ + return SymCryptDigitsFromBits( pCurve->GOrdBitsize + pCurve->coFactorPower ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptEcurvePrivateKeyDefaultFormat( _In_ PCSYMCRYPT_ECURVE pCurve ) +{ + return pCurve->PrivateKeyDefaultFormat; +} + +UINT32 +SYMCRYPT_CALL +SymCryptEcurveHighBitRestrictionNumOfBits( _In_ PCSYMCRYPT_ECURVE pCurve ) +{ + return pCurve->HighBitRestrictionNumOfBits; +} + +UINT32 +SYMCRYPT_CALL +SymCryptEcurveHighBitRestrictionPosition( _In_ PCSYMCRYPT_ECURVE pCurve ) +{ + return pCurve->HighBitRestrictionPosition; +} + +UINT32 +SYMCRYPT_CALL +SymCryptEcurveHighBitRestrictionValue( _In_ PCSYMCRYPT_ECURVE pCurve ) +{ + return pCurve->HighBitRestrictionValue; +} + +BOOLEAN +SYMCRYPT_CALL +SymCryptEcurveIsSame( + _In_ PCSYMCRYPT_ECURVE pCurve1, + _In_ PCSYMCRYPT_ECURVE pCurve2) +{ + BOOLEAN fIsSameCurve = FALSE; + + if ( pCurve1 == pCurve2 ) + { + fIsSameCurve = TRUE; + goto cleanup; + } + + if ( (pCurve1->type != pCurve2->type) || + !SymCryptIntIsEqual ( + SymCryptIntFromModulus( pCurve1->FMod ), + SymCryptIntFromModulus( pCurve2->FMod ) ) || + !SymCryptModElementIsEqual ( pCurve1->FMod, pCurve1->A, pCurve2->A ) || + !SymCryptModElementIsEqual ( pCurve1->FMod, pCurve1->B, pCurve2->B ) ) + { + goto cleanup; + } + + fIsSameCurve = TRUE; + +cleanup: + return fIsSameCurve; +} diff --git a/libs/symcrypt/lib/env_windowsUserModeWin8_1.c b/libs/symcrypt/lib/env_windowsUserModeWin8_1.c new file mode 100644 index 00000000000..ae66963b264 --- /dev/null +++ b/libs/symcrypt/lib/env_windowsUserModeWin8_1.c @@ -0,0 +1,187 @@ +// +// env_windowsUserMode.c +// Platform-specific code for windows user mode. +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +//#include "precomp.h" + +#pragma warning(push) +#pragma warning(disable: 5103) // Arm64's wdm.h included below currently generate a lot of 5103 warnings +#include <windows.h> +#pragma warning(pop) +#include "symcrypt.h" +#include "sc_lib.h" + +SYMCRYPT_CPU_FEATURES SYMCRYPT_CALL SymCryptCpuFeaturesNeverPresentEnvWindowsUsermodeWin8_1nLater() +{ + return 0; +} + +VOID +SYMCRYPT_CALL +SymCryptInitEnvWindowsUsermodeWin8_1nLater( UINT32 version ) +{ + if( g_SymCryptFlags & SYMCRYPT_FLAG_LIB_INITIALIZED ) + { + return; + } + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + // + // First we detect what the CPU has + // + SymCryptDetectCpuFeaturesByCpuid( SYMCRYPT_CPUID_DETECT_FLAG_CHECK_OS_SUPPORT_FOR_YMM ); + + // + // We also need to be sure that the OS supports the extended registers. + // + { + ULONGLONG FeatureMask = GetEnabledXStateFeatures(); + + if( !(FeatureMask & XSTATE_MASK_AVX) ) + { + g_SymCryptCpuFeaturesNotPresent |= SYMCRYPT_CPU_FEATURE_AVX2; + } + + if( !(FeatureMask & XSTATE_MASK_AVX512) ) + { + g_SymCryptCpuFeaturesNotPresent |= SYMCRYPT_CPU_FEATURE_AVX512; + } + } + + // + // Our SaveXmm function never fails because it doesn't have to do anything in User mode. + // + g_SymCryptCpuFeaturesNotPresent &= ~SYMCRYPT_CPU_FEATURE_SAVEXMM_NOFAIL; + +#elif SYMCRYPT_CPU_ARM + + g_SymCryptCpuFeaturesNotPresent = (SYMCRYPT_CPU_FEATURES) ~SYMCRYPT_CPU_FEATURE_NEON; + +#elif SYMCRYPT_CPU_ARM64 + + SymCryptDetectCpuFeaturesFromIsProcessorFeaturePresent(); + +#endif + + SymCryptInitEnvCommon( version ); +} + +_Analysis_noreturn_ +VOID +SYMCRYPT_CALL +SymCryptFatalEnvWindowsUsermodeWin8_1nLater( UINT32 fatalCode ) +{ + UINT32 fatalCodeVar; + + SymCryptFatalIntercept( fatalCode ); + + // + // Put the fatal code in a location where it shows up in the dump + // + SYMCRYPT_FORCE_WRITE32( &fatalCodeVar, fatalCode ); + + // + // Our first preference is to fastfail, + // the second to create an AV, which triggers a Watson report so that we get to + // see what is going wrong. + // + __fastfail( FAST_FAIL_CRYPTO_LIBRARY ); + + // + // Next we write to the NULL pointer, this causes an AV + // + SYMCRYPT_FORCE_WRITE32( (volatile UINT32 *)NULL, fatalCode ); + + // + // If that fails, we terminate the process. (This function call also ensures that this environment is actually + // used in user mode and not some other environment.) + // (During testing we had the TerminateProcess as the first option, but that makes debugging very hard as + // it leaves no traces of what went wrong.) + // + TerminateProcess( GetCurrentProcess(), fatalCode ); + + SymCryptFatalHang( fatalCode ); +} + +#if SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_X86 + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSaveXmmEnvWindowsUsermodeWin8_1nLater( _Out_ PSYMCRYPT_EXTENDED_SAVE_DATA pSaveArea ) +{ + // + // In usermode there is no need to save XMM registers. + // The compiler should inline this function and optimize it away. + // + + UNREFERENCED_PARAMETER( pSaveArea ); + + return SYMCRYPT_NO_ERROR; +} + +VOID +SYMCRYPT_CALL +SymCryptRestoreXmmEnvWindowsUsermodeWin8_1nLater( _Inout_ PSYMCRYPT_EXTENDED_SAVE_DATA pSaveArea ) +{ + // + // In usermode there is no need to save XMM registers. + // The compiler should inline this function and optimize it away. + // + + UNREFERENCED_PARAMETER( pSaveArea ); +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSaveYmmEnvWindowsUsermodeWin8_1nLater( _Out_ PSYMCRYPT_EXTENDED_SAVE_DATA pSaveArea ) +{ + // + // In usermode there is no need to save XMM registers. + // The compiler should inline this function and optimize it away. + // + + UNREFERENCED_PARAMETER( pSaveArea ); + + return SYMCRYPT_NO_ERROR; +} + +VOID +SYMCRYPT_CALL +SymCryptRestoreYmmEnvWindowsUsermodeWin8_1nLater( _Inout_ PSYMCRYPT_EXTENDED_SAVE_DATA pSaveArea ) +{ + // + // In usermode there is no need to save XMM registers. + // The compiler should inline this function and optimize it away. + // + + UNREFERENCED_PARAMETER( pSaveArea ); +} + +#endif + +VOID +SYMCRYPT_CALL +SymCryptTestInjectErrorEnvWindowsUsermodeWin8_1nLater( PBYTE pbBuf, SIZE_T cbBuf ) +{ + // + // This feature is only used during testing. In production it is always + // an empty function that the compiler can optimize away. + // + UNREFERENCED_PARAMETER( pbBuf ); + UNREFERENCED_PARAMETER( cbBuf ); +} + +#if SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_X86 + +VOID +SYMCRYPT_CALL +SymCryptCpuidExFuncEnvWindowsUsermodeWin8_1nLater( int cpuInfo[4], int function_id, int subfunction_id ) +{ + __cpuidex( cpuInfo, function_id, subfunction_id ); +} + +#endif diff --git a/libs/symcrypt/lib/equal.c b/libs/symcrypt/lib/equal.c new file mode 100644 index 00000000000..eec4c804fb1 --- /dev/null +++ b/libs/symcrypt/lib/equal.c @@ -0,0 +1,48 @@ +// +// equal.c Memory comparison routine that is safe against side channels. +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +BOOLEAN +SYMCRYPT_CALL +SymCryptEqual( _In_reads_( cbBytes ) PCBYTE pbSrc1, + _In_reads_( cbBytes ) PCBYTE pbSrc2, + SIZE_T cbBytes ) +{ + UINT32 neq = 0; + BYTE b; + volatile BYTE * p1 = (volatile BYTE *) pbSrc1; + volatile BYTE * p2 = (volatile BYTE *) pbSrc2; + + // + // We use forced-access memory reads to ensure that the compiler doesn't get + // smart and implement an early-out solution. + // + + while( cbBytes >= 4 ) + { + neq |= SYMCRYPT_FORCE_READ32( (volatile UINT32 *) p1 ) ^ SYMCRYPT_FORCE_READ32( (volatile UINT32 *) p2 ); + p1 += 4; + p2 += 4; + cbBytes -= 4; + } + + // We have to deal with the remaining bytes using a separate accumulator to work around an issue in the ARM64 compiler. + if( cbBytes > 0 ) + { + b = 0; + while( cbBytes > 0 ) + { + b |= SYMCRYPT_FORCE_READ8( p1 ) ^ SYMCRYPT_FORCE_READ8( p2 ); + p1++; + p2++; + cbBytes--; + } + neq |= b; + } + + return neq == 0; +} diff --git a/libs/symcrypt/lib/fdef_general.c b/libs/symcrypt/lib/fdef_general.c new file mode 100644 index 00000000000..fe18eaac5f5 --- /dev/null +++ b/libs/symcrypt/lib/fdef_general.c @@ -0,0 +1,1550 @@ +// +// fdef_general.c General functions of the default format. +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// +// + +#include "precomp.h" + +#include "smallPrimes32.h" // For SymCryptTestTrialdivisionMaxSmallPrime + +#if SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM64 + +#define SYMCRYPT_TRIALDIVISION_DIGIT_REDUCTION_CYCLES (16) // Measured on amd64 +#define SYMCRYPT_TRIALDIVISION_DIVIDE_TEST_CYCLES (2) // Measured on amd64 +#define SYMCRYPT_RABINMILLER_DIGIT_CYCLES (43000) // Measured on amd64 + +#elif SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_ARM + +#define SYMCRYPT_TRIALDIVISION_DIGIT_REDUCTION_CYCLES (18) // Measured on x86 +#define SYMCRYPT_TRIALDIVISION_DIVIDE_TEST_CYCLES (16) // Measured on x86 +#define SYMCRYPT_RABINMILLER_DIGIT_CYCLES (25300) // Measured on x86 + +#else + +#define SYMCRYPT_TRIALDIVISION_DIGIT_REDUCTION_CYCLES (18) // Measured on x86 +#define SYMCRYPT_TRIALDIVISION_DIVIDE_TEST_CYCLES (16) // Measured on x86 +#define SYMCRYPT_RABINMILLER_DIGIT_CYCLES (25300) // Measured on x86 + +#endif + + +#define SYMCRYPT_TRIALDIVISION_MAX_SMALL_PRIME (1<<22) // Some large limit to bound memory usage +C_ASSERT( SYMCRYPT_TRIALDIVISION_MAX_SMALL_PRIME <= UINT32_MAX ); +C_ASSERT( SYMCRYPT_TRIALDIVISION_MAX_SMALL_PRIME == ((UINT32) SYMCRYPT_TRIALDIVISION_MAX_SMALL_PRIME) ); + +VOID +SYMCRYPT_CALL +SymCryptFdefMaskedCopyC( + _In_reads_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE ) PCBYTE pbSrc, + _Inout_updates_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE ) PBYTE pbDst, + UINT32 nDigits, + UINT32 mask ) + /* + This function is dangerous, and would create a buffer overflow if nDigits > nDigits for pbDst + It also appears that it is never called. Consider removing it if it is not needed. + */ +{ + UINT64 m64 = (UINT64)0 - (mask & 1); + PUINT64 pSrc = (PUINT64) pbSrc; // should be a const pointer to match pSrc + PUINT64 pDst = (PUINT64) pbDst; + SIZE_T i; + + // This allows 0xffffffff and 0, is that what you wanted? + // If so, ( mask == 0xffffffff || mask == 0 ) + // would be more readable. It is also odd that 1 is not valid, but it results in exactly the + // same code flow as ~0. + SYMCRYPT_ASSERT( (mask + 1) < 2 ); // Check that mask is valid + + // This - nDigits * SYMCRYPT_FDEF_DIGIT_SIZE / sizeof( UINT64 ) + // seems to occur often. Consider a macro with a name that explains what you are doing + // A comment on the macro which explains why this multiplication is never a problem would be + // helpful - I'm fairly sure it is not a problem. + for( i=0; i< nDigits * SYMCRYPT_FDEF_DIGIT_SIZE / sizeof( UINT64 ); i += 2 ) + { + pDst[i ] = (pSrc[i ] & m64) | (pDst[i ] & ~m64 ); + pDst[i+1] = (pSrc[i+1] & m64) | (pDst[i+1] & ~m64 ); + } +} + +VOID +SYMCRYPT_CALL +SymCryptFdefMaskedCopy( + _In_reads_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE ) PCBYTE pbSrc, + _Inout_updates_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE ) PBYTE pbDst, + UINT32 nDigits, + UINT32 mask ) +{ +#if SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_ARM64 | SYMCRYPT_CPU_ARM + SYMCRYPT_ASSERT_ASYM_ALIGNED( pbSrc ); + SYMCRYPT_ASSERT_ASYM_ALIGNED( pbDst ); + SymCryptFdefMaskedCopyAsm( pbSrc, pbDst, nDigits, mask ); +#else + SymCryptFdefMaskedCopyC( pbSrc, pbDst, nDigits, mask ); +#endif +} + +VOID +SYMCRYPT_CALL +SymCryptFdefConditionalSwapC( + _Inout_updates_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE ) PBYTE pbSrc1, + _Inout_updates_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE ) PBYTE pbSrc2, + UINT32 nDigits, + UINT32 cond ) +{ + /* + Some documentation as to what the cond argument means would be helpful. + */ + UINT64 m64 = (UINT64)0 - (cond & 1); + PUINT64 pSrc1 = (PUINT64) pbSrc1; + PUINT64 pSrc2 = (PUINT64) pbSrc2; + UINT64 tmp1 = 0; + UINT64 tmp2 = 0; + SIZE_T i; + + // Unlike the previous function, this only allows 0 and 1 why? + SYMCRYPT_ASSERT( cond < 2 ); // Check that the condition is valid + + for( i=0; i< nDigits * SYMCRYPT_FDEF_DIGIT_SIZE / sizeof( UINT64 ); i += 2 ) + { + tmp1 = (pSrc1[i ] ^ pSrc2[i ]) & m64; + tmp2 = (pSrc1[i+1] ^ pSrc2[i+1]) & m64; + + pSrc1[i ] ^= tmp1; pSrc2[i ] ^= tmp1; + pSrc1[i+1] ^= tmp2; pSrc2[i+1] ^= tmp2; + } +} + +VOID +SYMCRYPT_CALL +SymCryptFdefConditionalSwap( + _Inout_updates_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE ) PBYTE pbSrc1, + _Inout_updates_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE ) PBYTE pbSrc2, + UINT32 nDigits, + UINT32 cond ) +{ + SymCryptFdefConditionalSwapC( pbSrc1, pbSrc2, nDigits, cond ); +} + + +UINT32 +SymCryptFdefDigitsFromBits( UINT32 nBits ) +{ + UINT32 res; + + if( nBits == 0 ) + { + res = 1; + } + else + { + SYMCRYPT_ASSERT( nBits <= SYMCRYPT_INT_MAX_BITS ); + + // Callers with integers larger than SYMCRYPT_INT_MAX_BITS should not occur in real use cases + // To avoid overflow issues, return the 0 digits to indicate an error which can be handled by + // callers, or flow through into object allocation which will in turn recognize the invalid + // digit count. + if( nBits > SYMCRYPT_INT_MAX_BITS ) + { + res = 0; + } else { + res = SYMCRYPT_FDEF_DIGITS_FROM_BITS( nBits ); + } + } + + return res; +} + +// Let's limit max bits to the number of bits we actually test +C_ASSERT( SYMCRYPT_INT_MAX_BITS < (1 << 30) ); // Larger values can cause overflows and sign confusion + +PSYMCRYPT_INT +SYMCRYPT_CALL +SymCryptFdefIntAllocate( UINT32 nDigits ) +{ + PVOID p = NULL; + UINT32 cb; + PSYMCRYPT_INT res = NULL; + + // + // The nDigits requirements are enforced by SymCryptFdefSizeofIntFromDigits. Thus + // the result does not overflow and is upper bounded by 2^18. + // + cb = SymCryptFdefSizeofIntFromDigits( nDigits ); + + if( cb != 0 ) + { + p = SymCryptCallbackAlloc( cb ); + } + + if( p == NULL ) + { + goto cleanup; + } + + res = SymCryptIntCreate( p, cb, nDigits ); + +cleanup: + return res; +} + + +UINT32 +SYMCRYPT_CALL +SymCryptFdefSizeofIntFromDigits( UINT32 nDigits ) +{ + SYMCRYPT_ASSERT( nDigits != 0 ); + SYMCRYPT_ASSERT( nDigits <= SYMCRYPT_FDEF_UPB_DIGITS ); + + // Ensure we do not overflow the following calculation when provided with invalid inputs + if( nDigits == 0 || nDigits > SYMCRYPT_FDEF_UPB_DIGITS ) + { + return 0; + } + + // Note: ti stands for 'Type-Int' and it helps catch type errors when type-casting macros are used. + return SYMCRYPT_FIELD_OFFSET( SYMCRYPT_INT, ti ) + nDigits * SYMCRYPT_FDEF_DIGIT_SIZE; +} + +PSYMCRYPT_INT +SYMCRYPT_CALL +SymCryptFdefIntCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + UINT32 nDigits ) +{ + PSYMCRYPT_INT pInt = NULL; + UINT32 cb = SymCryptFdefSizeofIntFromDigits( nDigits ); + + SYMCRYPT_ASSERT( cb >= sizeof(SYMCRYPT_INT) ); + SYMCRYPT_ASSERT( cbBuffer >= cb ); + if( (cb == 0) || (cbBuffer < cb) ) + { + goto cleanup; // return NULL + } + + SYMCRYPT_ASSERT_ASYM_ALIGNED( pbBuffer ); + pInt = (PSYMCRYPT_INT) pbBuffer; + + pInt->type = 'gI' << 16; + pInt->nDigits = nDigits; + + // + // The nDigits requirements are enforced by SymCryptFdefSizeofIntFromDigits. Thus + // the result does not overflow and is upper bounded by 2^18. + // + pInt->cbSize = cb; + + SYMCRYPT_SET_MAGIC( pInt ); + +cleanup: + return pInt; +} + + +VOID +SymCryptFdefIntCopyFixup( + _In_ PCSYMCRYPT_INT pSrc, + _Out_ PSYMCRYPT_INT pDst ) +{ + UNREFERENCED_PARAMETER( pSrc ); + UNREFERENCED_PARAMETER( pDst ); // not used in FRE builds... + + SYMCRYPT_SET_MAGIC( pDst ); +} + +VOID +SymCryptFdefIntCopy( + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_INT piDst ) +{ + SYMCRYPT_CHECK_MAGIC( piSrc ); + SYMCRYPT_CHECK_MAGIC( piDst ); + + SYMCRYPT_ASSERT( piSrc->nDigits == piDst->nDigits ); + + // + // in-place copy is somewhat common, and addresses are always public, so we can test for a no-op copy. + // + if( piSrc != piDst ) + { + // This is normally considered a banned, unsafe function. A note about why it is safe in this use + // would be good. + memcpy( SYMCRYPT_FDEF_INT_PUINT32( piDst ), SYMCRYPT_FDEF_INT_PUINT32( piSrc ), SYMCRYPT_OBJ_NBYTES( piDst )); + } +} + +VOID +SymCryptFdefIntMaskedCopy( + _In_ PCSYMCRYPT_INT piSrc, + _Inout_ PSYMCRYPT_INT piDst, + UINT32 mask ) + /* + Function notes would be helpful - what is mask, what does it do? + */ +{ + SYMCRYPT_CHECK_MAGIC( piSrc ); + SYMCRYPT_CHECK_MAGIC( piDst ); + + SYMCRYPT_ASSERT( piSrc->nDigits == piDst->nDigits ); + + SymCryptFdefMaskedCopy( (PBYTE) SYMCRYPT_FDEF_INT_PUINT32( piSrc ), (PBYTE) SYMCRYPT_FDEF_INT_PUINT32( piDst ), piSrc->nDigits, mask ); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefIntConditionalCopy( + _In_ PCSYMCRYPT_INT piSrc, + _Inout_ PSYMCRYPT_INT piDst, + UINT32 cond ) +{ + SYMCRYPT_CHECK_MAGIC( piSrc ); + SYMCRYPT_CHECK_MAGIC( piDst ); + + SYMCRYPT_ASSERT( piSrc->nDigits == piDst->nDigits ); + + SymCryptFdefMaskedCopy( (PBYTE) SYMCRYPT_FDEF_INT_PUINT32( piSrc ), (PBYTE) SYMCRYPT_FDEF_INT_PUINT32( piDst ), piSrc->nDigits, SYMCRYPT_MASK32_NONZERO( cond ) ); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefIntConditionalSwap( + _Inout_ PSYMCRYPT_INT piSrc1, + _Inout_ PSYMCRYPT_INT piSrc2, + UINT32 cond ) +{ + SYMCRYPT_CHECK_MAGIC( piSrc1 ); + SYMCRYPT_CHECK_MAGIC( piSrc2 ); + + SYMCRYPT_ASSERT( piSrc1->nDigits == piSrc2->nDigits ); + + SymCryptFdefConditionalSwap( (PBYTE) SYMCRYPT_FDEF_INT_PUINT32( piSrc1 ), (PBYTE) SYMCRYPT_FDEF_INT_PUINT32( piSrc2 ), piSrc1->nDigits, cond ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntBitsizeOfObject( _In_ PCSYMCRYPT_INT piSrc ) +{ + // This does not overflow since the nDigits field is + // bounded by SYMCRYPT_FDEF_UPB_DIGITS. + return SYMCRYPT_FDEF_DIGIT_BITS * piSrc->nDigits; +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefNumberofDigitsFromInt( _In_ PCSYMCRYPT_INT piSrc ) +{ + return piSrc->nDigits; +} + +SYMCRYPT_ERROR +SymCryptFdefIntCopyMixedSize( + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_INT piDst ) +{ + UINT32 n; + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + SYMCRYPT_CHECK_MAGIC( piSrc ); + SYMCRYPT_CHECK_MAGIC( piDst ); + + // in-place copy is somewhat common, and addresses are always public, so we can test for a no-op copy. + if( piSrc == piDst ) + { + goto cleanup; + } + + // + // Copy the digits that are available in both + // + n = SYMCRYPT_MIN( piSrc->nDigits, piDst->nDigits ); + memcpy( SYMCRYPT_FDEF_INT_PUINT32( piDst ), SYMCRYPT_FDEF_INT_PUINT32( piSrc ), n * SYMCRYPT_FDEF_DIGIT_SIZE ); + + if( piDst->nDigits > n ) + { + SymCryptWipe( &SYMCRYPT_FDEF_INT_PUINT32( piDst )[n * SYMCRYPT_FDEF_DIGIT_NUINT32], (piDst->nDigits - n) * SYMCRYPT_FDEF_DIGIT_SIZE ); + } + + if( piSrc->nDigits > n ) + { + // Check that the rest of the source is zero + PUINT64 p = (PUINT64) &SYMCRYPT_FDEF_INT_PUINT32( piSrc )[n * SYMCRYPT_FDEF_DIGIT_NUINT32]; + UINT64 v = 0; + UINT32 i = (piSrc->nDigits - n) * SYMCRYPT_FDEF_DIGIT_SIZE / sizeof( UINT64 ); + while( i > 0 ) + { + v |= *p++; + i--; + } + + // + // If the Src doesn't fit, we are allowed to publish that fact, so we can use an IF. + // + if( v != 0 ) + { + scError = SYMCRYPT_BUFFER_TOO_SMALL; + goto cleanup; + } + } + +cleanup: + return scError; +} + + +UINT32 +SYMCRYPT_CALL +SymCryptFdefBitsizeOfUint32( UINT32 v ) +{ + UINT32 res; + UINT32 mask; + UINT32 vUpper; + UINT32 vBit1; + + // This is tricky to do side-channel safe using only defined behaviour of the C language. + + // This is very difficult to make any sense of. A comment containing the C code that one would normally + // write to do the same thing would be helpful. I will need to come back to this. + // Also, there is no test coverage of this function. There should be a unit test to show that it does the same thing + // as the code one would normally write. + + vUpper = v & 0xffff0000; + mask = (UINT32) ( (0 -(UINT64)(vUpper)) >> 32 ); // mask = 0 or 0xffffffff + res = mask & 16; // Why do we want the 9th bit? Also, 0x10 would be better here + v = ((v & 0xffff) & ~mask) | ((vUpper >> 16) & mask); + + vUpper = v & 0xff00; + mask = (0 - vUpper) >> 16; // mask = 0 or 0xffff + res |= mask & 8; + v = ((v & 0xff) & ~mask) | ((v >> 8) & mask); + + vUpper = v & 0xf0; + mask = (0 - vUpper) >> 16; + res |= mask & 4; + v = ((v & 0xf) & ~mask) | ((v >> 4) & mask ); + + vUpper = v & 0xc; + mask = (0 - vUpper) >> 16; + res |= mask & 2; + v = ((v & 0x3) & ~mask) | ((v >> 2) & mask); + + // + // Only 2 bits left. + // + vBit1 = (v >> 1) & 1; + res |= vBit1; + + // + // Now we have the bit number of the MSbit set in res. + // We need to increase this by one if v was nonzero, so that we + // get 0 for v==0, and the # bits needed for v > 0 + // + res += (v | vBit1) & 1; + + return res; +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntBitsizeOfValue( _In_ PCSYMCRYPT_INT piSrc ) +{ + UINT32 nUint32 = SYMCRYPT_OBJ_NUINT32( piSrc ); + + UINT32 res = 0; + UINT32 msNonzeroWord = 0; // most significant nonzero digit + UINT32 searchingMask = SYMCRYPT_MASK32_SET; // Set if still searching, 0 otherwise + UINT32 d; + UINT32 dIsNonzeroMask; + UINT32 foundMask; + + SYMCRYPT_CHECK_MAGIC( piSrc ); + + // This while loop reveals the value of nUint32, is that OK? + // If so, document why + while( nUint32 > 0 ) + { + // + // Invariant: + // If no nonzero digit has been found, res = 0 and updateMask = -1. + // If a nonzero digit has been found: + // msNonzeroDigit = most significant nonzero digit in Src + // res = index where most-significant nonzero digit was found + // updateMask = 0 + // + + nUint32--; + d = SYMCRYPT_FDEF_INT_PUINT32( piSrc )[nUint32]; + + dIsNonzeroMask = SYMCRYPT_MASK32_NONZERO( d ); + foundMask = dIsNonzeroMask & searchingMask; + res |= nUint32 & foundMask; + msNonzeroWord |= d & foundMask; + searchingMask &= ~foundMask; + } + + // + // If all words are zero, then res == 0 and msNonzeroDigit == 0. + // + res = res * 8 * sizeof( UINT32 ) + SymCryptFdefBitsizeOfUint32( msNonzeroWord ); + + return res; +} + +VOID +SYMCRYPT_CALL +SymCryptFdefIntSetValueUint32( + UINT32 u32Src, + _Out_ PSYMCRYPT_INT piDst ) +{ + SYMCRYPT_CHECK_MAGIC( piDst ); + + SymCryptWipe( SYMCRYPT_FDEF_INT_PUINT32( piDst ), SYMCRYPT_OBJ_NBYTES( piDst ) ); + SYMCRYPT_FDEF_INT_PUINT32( piDst )[0] = u32Src; +} + +C_ASSERT( SYMCRYPT_FDEF_DIGIT_SIZE >= 8 ); // Code below fails if this doesn't hold + +VOID +SYMCRYPT_CALL +SymCryptFdefIntSetValueUint64( + UINT64 u64Src, + _Out_ PSYMCRYPT_INT piDst ) +{ + SYMCRYPT_CHECK_MAGIC( piDst ); + + SymCryptWipe( SYMCRYPT_FDEF_INT_PUINT32( piDst ), SYMCRYPT_OBJ_NBYTES( piDst ) ); + SYMCRYPT_FDEF_INT_PUINT32( piDst )[0] = (UINT32) u64Src; + SYMCRYPT_FDEF_INT_PUINT32( piDst )[1] = (UINT32)(u64Src >> 32); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptFdefRawSetValue( + _In_reads_bytes_(cbSrc) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_NUMBER_FORMAT format, + _Out_writes_(nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pDst, + UINT32 nDigits ) +{ + SYMCRYPT_ERROR scError; + UINT32 b; + INT32 step; + UINT32 w; + UINT32 windex; + UINT32 i; + UINT32 nWords = nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; + + // + // This is a very simple and slow generic implementation; + // We'll create optimized versions for specific CPU platforms + // (e.g. use of memcpy) + // + + // I assume the number format is public? + switch( format ) + { + case SYMCRYPT_NUMBER_FORMAT_LSB_FIRST: + step = 1; + break; + case SYMCRYPT_NUMBER_FORMAT_MSB_FIRST: + step = -1; + pbSrc += cbSrc; // avoid tripping pointer overflow sanitizer with cbSrc == 0 + pbSrc--; + break; + default: + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + for( windex = 0; windex < nWords; windex++ ) + { + w = 0; + for( i=0; i<4; i++ ) + { + // read the next byte into b + if( cbSrc > 0 ) + { + b = *pbSrc; + cbSrc -= 1; + pbSrc += step; + w |= b << 8*i; + } + } + pDst[windex] = w; + } + + // Inspect any remaining input bytes + b = 0; + while( cbSrc > 0 ) + { + b |= *pbSrc; + pbSrc += step; + cbSrc -= 1; + } + + if( b > 0 ) + { + scError = SYMCRYPT_BUFFER_TOO_SMALL; + goto cleanup; + } + + scError = SYMCRYPT_NO_ERROR; + +cleanup: + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptFdefIntSetValue( + _In_reads_bytes_(cbSrc) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_NUMBER_FORMAT format, + _Out_ PSYMCRYPT_INT piDst ) +{ + SYMCRYPT_ERROR scError; + + SYMCRYPT_CHECK_MAGIC( piDst ); + + scError = SymCryptFdefRawSetValue( pbSrc, cbSrc, format, SYMCRYPT_FDEF_INT_PUINT32( piDst ), piDst->nDigits ); + + return scError; +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptFdefRawGetValue( + _In_reads_(nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc, + UINT32 nDigits, + _Out_writes_bytes_(cbDst) PBYTE pbDst, + SIZE_T cbDst, + SYMCRYPT_NUMBER_FORMAT format ) +{ + SYMCRYPT_ERROR scError; + UINT32 b; + INT32 step; + UINT32 w; + UINT32 windex; + UINT32 i; + UINT32 nWords = nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; + + // + // This is a very simple and slow generic implementation; + // We'll create optimized versions for specific CPU platforms + // (e.g. use of memcpy) + // + + switch( format ) + { + case SYMCRYPT_NUMBER_FORMAT_LSB_FIRST: + step = 1; + break; + case SYMCRYPT_NUMBER_FORMAT_MSB_FIRST: + step = -1; + pbDst += cbDst; // avoid tripping pointer overflow sanitizer with cbSrc == 0 + pbDst--; + break; + default: + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + for( windex = 0; windex < nWords; windex++ ) + { + w = pSrc[windex]; + for( i=0; i<4; i++ ) + { + b = w & 0xff; + w >>= 8; + + // write the next byte + if( cbDst > 0 ) + { + *pbDst = (BYTE)b; + cbDst -= 1; + pbDst += step; + } else { + if( b != 0 ) + { + scError = SYMCRYPT_BUFFER_TOO_SMALL; + goto cleanup; + } + } + } + } + + // Zero any remaining output bytes + while( cbDst > 0 ) + { + *pbDst = 0; + pbDst += step; + cbDst -= 1; + } + + scError = SYMCRYPT_NO_ERROR; + +cleanup: + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptFdefIntGetValue( + _In_ PCSYMCRYPT_INT piSrc, + _Out_writes_bytes_(cbDst) PBYTE pbDst, + SIZE_T cbDst, + SYMCRYPT_NUMBER_FORMAT format ) +{ + SYMCRYPT_ERROR scError; + + SYMCRYPT_CHECK_MAGIC( piSrc ); + + scError = SymCryptFdefRawGetValue( &SYMCRYPT_FDEF_INT_PUINT32( piSrc )[0], piSrc->nDigits, pbDst, cbDst, format ); + + return scError; +} + + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntGetValueLsbits32( _In_ PCSYMCRYPT_INT piSrc ) +{ + // nDigits cannot be zero, so we don't have to test + return SYMCRYPT_FDEF_INT_PUINT32( piSrc )[0]; +} + +UINT64 +SYMCRYPT_CALL +SymCryptFdefIntGetValueLsbits64( _In_ PCSYMCRYPT_INT piSrc ) +{ + // nDigits cannot be zero, so we don't have to test + PCUINT32 p = SYMCRYPT_FDEF_INT_PUINT32( piSrc ); + return ((UINT64)(p[1]) << 32) | p[0]; +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawIsEqualUint32( + _In_reads_(nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc1, + UINT32 nDigits, + _In_ UINT32 u32Src2 ) +{ + UINT32 d; + UINT32 nWords = nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; + + d = pSrc1[0] ^ u32Src2; + for( UINT32 i=1; i<nWords; i++) + { + d |= pSrc1[i]; + } + + return SYMCRYPT_MASK32_ZERO( d ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntIsEqualUint32( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ UINT32 u32Src2 ) +{ + return SymCryptFdefRawIsEqualUint32( &SYMCRYPT_FDEF_INT_PUINT32( piSrc1 )[0], piSrc1->nDigits, u32Src2 ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntIsEqual( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2 ) +{ + UINT32 d; + UINT32 n1 = SYMCRYPT_OBJ_NUINT32( piSrc1 ); + UINT32 n2 = SYMCRYPT_OBJ_NUINT32( piSrc2 ); + UINT32 i; + UINT32 n; + PCUINT32 pSrc1 = SYMCRYPT_FDEF_INT_PUINT32( piSrc1 ); + PCUINT32 pSrc2 = SYMCRYPT_FDEF_INT_PUINT32( piSrc2 ); + + n = SYMCRYPT_MIN( n1, n2 ); + d = 0; + for( i=0; i < n ; i++ ) + { + d |= pSrc1[i] ^ pSrc2[i]; + } + + // i == n1 or i == n2, so at most one of the 2 loops below is ever run + + while( i < n1 ) + { + d |= pSrc1[i]; + i++; + } + + while( i < n2 ) + { + d |= pSrc2[i]; + i++; + } + + return SYMCRYPT_MASK32_ZERO( d ); +} + +PSYMCRYPT_DIVISOR +SYMCRYPT_CALL +SymCryptFdefDivisorAllocate( UINT32 nDigits ) +{ + PVOID p = NULL; + UINT32 cb; + PSYMCRYPT_DIVISOR res = NULL; + + // + // The nDigits requirements are enforced by SymCryptFdefSizeofDivisorFromDigits. Thus + // the result does not overflow and is upper bounded by 2^19. + // + cb = SymCryptFdefSizeofDivisorFromDigits( nDigits ); + + if( cb != 0 ) + { + p = SymCryptCallbackAlloc( cb ); + } + + if( p == NULL ) + { + goto cleanup; + } + + res = SymCryptFdefDivisorCreate( p, cb, nDigits ); + +cleanup: + return res; +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefSizeofDivisorFromDigits( UINT32 nDigits ) +{ + SYMCRYPT_ASSERT( nDigits != 0 ); + SYMCRYPT_ASSERT( nDigits <= SYMCRYPT_FDEF_UPB_DIGITS ); + + // Ensure we do not overflow the following calculation when provided with invalid inputs + if( nDigits == 0 || nDigits > SYMCRYPT_FDEF_UPB_DIGITS ) + { + return 0; + } + + return SYMCRYPT_FIELD_OFFSET( SYMCRYPT_DIVISOR, Int ) + SymCryptFdefSizeofIntFromDigits( nDigits ); +} + +PSYMCRYPT_DIVISOR +SYMCRYPT_CALL +SymCryptFdefDivisorCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + UINT32 nDigits ) +{ + PSYMCRYPT_DIVISOR pdDiv = NULL; + UINT32 cb = SymCryptSizeofDivisorFromDigits( nDigits ); + + SYMCRYPT_ASSERT( cb >= sizeof(SYMCRYPT_DIVISOR) ); + SYMCRYPT_ASSERT( cbBuffer >= cb ); + if( (cb == 0) || (cbBuffer < cb) ) + { + goto cleanup; // return NULL + } + + SYMCRYPT_ASSERT_ASYM_ALIGNED( pbBuffer ); + pdDiv = (PSYMCRYPT_DIVISOR) pbBuffer; + + pdDiv->type = 'gD' << 16; + pdDiv->nDigits = nDigits; + + // + // The nDigits requirements are enforced by SymCryptFdefSizeofDivisorFromDigits. Thus + // the result does not overflow and is upper bounded by 2^19. + // + pdDiv->cbSize = cb; + + SYMCRYPT_SET_MAGIC( pdDiv ); + + SymCryptIntCreate( (PBYTE)&pdDiv->Int, cbBuffer - SYMCRYPT_FIELD_OFFSET( SYMCRYPT_DIVISOR, Int ), nDigits ); + +cleanup: + return pdDiv; +} + +VOID +SymCryptFdefDivisorCopyFixup( + _In_ PCSYMCRYPT_DIVISOR pdSrc, + _Out_ PSYMCRYPT_DIVISOR pdDst ) +{ + UNREFERENCED_PARAMETER( pdSrc ); + UNREFERENCED_PARAMETER( pdDst ); + + SymCryptFdefIntCopyFixup( &pdSrc->Int, &pdDst->Int ); + + SYMCRYPT_SET_MAGIC( pdDst ); +} + +VOID +SymCryptFdefDivisorCopy( + _In_ PCSYMCRYPT_DIVISOR pdSrc, + _Out_ PSYMCRYPT_DIVISOR pdDst ) +{ + SYMCRYPT_CHECK_MAGIC( pdSrc ); + SYMCRYPT_CHECK_MAGIC( pdDst ); + + SYMCRYPT_ASSERT( pdSrc->nDigits == pdDst->nDigits ); + + // in-place copy is somewhat common, and addresses are always public, so we can test for a no-op copy. + if( pdSrc != pdDst ) + { + memcpy( pdDst, pdSrc, pdDst->cbSize ); + + SymCryptFdefDivisorCopyFixup( pdSrc, pdDst ); + } +} + + +VOID +SYMCRYPT_CALL +SymCryptFdefClaimScratch( PBYTE pbScratch, SIZE_T cbScratch, SIZE_T cbMin ) +{ +#if SYMCRYPT_DEBUG + SYMCRYPT_ASSERT( cbScratch >= cbMin ); + SymCryptWipe( pbScratch, cbMin ); +#else + UNREFERENCED_PARAMETER( pbScratch ); + UNREFERENCED_PARAMETER( cbScratch ); + UNREFERENCED_PARAMETER( cbMin ); +#endif +} + +UINT32 +SymCryptTestTrialdivisionMaxSmallPrime( + _In_ PCSYMCRYPT_TRIALDIVISION_CONTEXT pContext ) +{ + return pContext->maxTrialPrime; +} + +UINT64 +SymCryptInverseMod2e64( UINT64 m ) +{ + // Compute the inv64 value such that inv64 * m = 1 mod 2^64 for odd m. + // If m is even, there exists no inverse, this function will return a + // useless value in constant time. + // + // We use Newton's method to search for a zero of f(x) := x^-1 - m, working modulo 2^64 + // We get the iteration formula + // x_{i+1} = x_i - f(x_i)/f'(x_i) + // = x_i - (x_i^-1 - m)/(-x_i^-2) + // = x_i + x_i^2(1/x_i - m) + // = x_i + x_i - (x_i^2 * m) + // = x_i (2 - x_i*m) + // + // Let x_i = d + 2^n * e where d = inv64 = m^-1 mod 2^64, and 2^n * e is the error term that is zero in the n least + // significant bits. We have + // x_{i+1} = (d + 2^n * e) (2 - (d + 2^n * e) * m) + // = (d + 2^n * e) (2 - d*m - 2^n * e * m) + // = (d + 2^n * e) (2 - 1 - 2^n * e * m) + // = (d + 2^n * e) (1 - 2^n * e * m) + // = d - (2^n * e * (d*m)) + (2^n * e) - (2^{2n} * e^2 * m) + // = d - (2^{2n} * e^2 * m) + // In other words, the error has been squared and multiplied by m. In our case, working modulo 2^64, the number of correct bits + // on the least significant side is doubled. + // + // To get a 4-bit correct estimate for m^-1 given odd m, we consider the least significant 4 bits of m and inv: + // m = ... m_3 m_2 m_1 m_0 + // inv = ... i_3 i_2 i_1 i_0 + // We want to directly compute i_[3..0] s.t. (m*inv) & 0xf == 1 + // working through some simple simultaneous equations it is easily shown that: + // i_0 = m_0 = 1 + // i_1 = m_1 + // i_2 = m_2 + // i_3 = m_1 ^ m_2 ^ m_3 + // Once we have 4 correct bits, we can double that multiple times using Newton's method. + // + // We use 32-bit operations for most of the iterations for speed on 32-bit platforms. + // + UINT32 inv32; + UINT64 inv64; + UINT32 m32; + + m32 = (UINT32)m; + + inv32 = m32 ^ (((m32 - 1) * 0x6) & 0x8); // sets inv32 bits [3..0] + SYMCRYPT_ASSERT( ((m&1) == 0) || (((inv32 * m32) & 0xf) == 1) ); + + inv32 = inv32 * (2 - inv32 * m32 ); + SYMCRYPT_ASSERT( ((m&1) == 0) || (((inv32 * m32) & 0xff) == 1) ); + + inv32 = inv32 * (2 - inv32 * m32 ); + SYMCRYPT_ASSERT( ((m&1) == 0) || (((inv32 * m32) & 0xffff) == 1) ); + + inv32 = inv32 * (2 - inv32 * m32 ); + SYMCRYPT_ASSERT( ((m&1) == 0) || ((inv32 * m32) == 1) ); + + inv64 = inv32; + inv64 = inv64 * (2 - inv64 * m ); + SYMCRYPT_ASSERT( ((m&1) == 0) || ((inv64 * m) == 1) ); + + return inv64; +} + + +VOID +SYMCRYPT_CALL +SymCryptFdefInitTrialdivisionPrime( + UINT32 prime, + _Out_ PSYMCRYPT_TRIALDIVISION_PRIME pPrime ) +{ + // Compute the inverse of the prime mod 2^64 + pPrime->invMod2e64 = SymCryptInverseMod2e64( prime ); + pPrime->compareLimit = ((UINT64) -1) / prime; +} + +FORCEINLINE +UINT32 +SymCryptIsMultipleOfSmallPrime( UINT64 value, PCSYMCRYPT_TRIALDIVISION_PRIME pPrime ) +{ + return (value * pPrime->invMod2e64) <= pPrime->compareLimit; +} + +VOID +SYMCRYPT_CALL +SymCryptFdefInitTrialDivisionGroup( PSYMCRYPT_TRIALDIVISION_GROUP pGroup, UINT32 nPrimes, UINT32 primeProd ) +{ + UINT32 f; + UINT32 r; + UINT32 i; + + pGroup->nPrimes = nPrimes; + + // These % operations are expensive; maybe we can optimize this further. + // In assembler we can do the UINT64 % UINT32 -> UINT32 + // hopefully the compiler is smart enough... + + f = (UINT32) (((UINT64)1 << 32) % primeProd); + pGroup->factor[0] = f; + r = f; + for( i=1; i<9; i++ ) + { + r = (UINT32) (SYMCRYPT_MUL32x32TO64( r, f ) % primeProd); + pGroup->factor[i] = r; + } +} + +UINT32 +SYMCRYPT_CALL +SymCryptGenerateSmallPrimes( UINT32 maxPrime, PUINT32 * ppList ) +{ + // returns a list of small primes, excluding 2, 3, 5, and 17. + UINT32 nPrimes = 0; + PUINT32 pList = NULL; + + // pSieve[i] corresponds to 2*i+1 + // value X is in location X/2 + UINT32 nSieve; + PBYTE pSieve; + + UINT32 pi; + UINT32 p; + UINT32 si; + UINT32 i; + + maxPrime = SYMCRYPT_MAX( maxPrime, 32 ); // simplify error handling by always producing primes at least up to 32 + maxPrime = SYMCRYPT_MIN( maxPrime, 1 << 24 ); // Limit prime list to something sane (sieve = 8 MB, list = 4 MB or so). + + // highest index is (maxPrime - 1)/2 which encodes maxPrime if odd, or maxPrime-1 if even + nSieve = (maxPrime - 1) / 2 + 1; + + pSieve = SymCryptCallbackAlloc( nSieve ); + if( pSieve == NULL ) + { + goto cleanup; + } + + SymCryptWipe( pSieve, nSieve ); + + + pi = 1; // index of first prime 3 + p = 2*pi + 1; // prime value + for(;;) + { + si = 2*(pi*pi + pi); // index of p^2 + if( si > nSieve ) + { + break; // We're done sieving + } + while( si < nSieve ) + { + pSieve[si] = 1; + si += p; + } + // Search for the next prime + do { + pi += 1; + } while( pSieve[pi] != 0 ); + p = 2*pi + 1; + } + + // Eliminate 3, 5, and 17 + pSieve[1] = 1; + pSieve[2] = 1; + pSieve[8] = 1; + + for( i=1; i<nSieve; i++ ) + { + nPrimes += 1 - pSieve[i]; + } + + // dcl - I suspect that this is not a problem, but please document + // why this multiplication cannot overflow. I assume there is a practical limit on nPrimes, but unsure + // what that would be. + pList = SymCryptCallbackAlloc( nPrimes * sizeof( UINT32 ) ); + if( pList == NULL ) + { + goto cleanup; + } + + pi = 0; + for( i=1; i<nSieve; i++ ) + { + if( pSieve[i] == 0 ) + { + pList[pi++] = 2*i+1; + } + } + + SYMCRYPT_ASSERT( pi == nPrimes ); + +cleanup: + if( pSieve != NULL ) + { + SymCryptWipe( pSieve, nSieve ); + SymCryptCallbackFree( pSieve ); + } + + *ppList = pList; + return nPrimes; +} + + +PCSYMCRYPT_TRIALDIVISION_CONTEXT +SYMCRYPT_CALL +SymCryptFdefCreateTrialDivisionContext( UINT32 nDigits ) +{ + PSYMCRYPT_TRIALDIVISION_CONTEXT pRes = NULL; + PBYTE pAlloc; + UINT32 nBytes; + UINT32 iPrime; + UINT32 iGroup; + UINT32 nPrimes; + UINT32 nGroups; + UINT32 M; + UINT32 iGroupSpec; + UINT32 i; + UINT32 j; + UINT64 cRabinMillerCost; + UINT64 cPerPrimeCost; + UINT64 tmp64; + UINT32 maxPrime; + UINT32 minPrime; + UINT32 nSmallPrimes = 0; + UINT32 n; + UINT32 nP; + UINT32 nG; + PUINT32 pSmallPrimeList = NULL; + + // First we estimate the largest prime we will do trial division with + // Inputs: + // - cycles/digit of reduction per group of primes + // - cycles/prime of divide test + // - cycles per digit^3 for a Rabin-Miller test + // We optimize in this model, which is pretty accurate for large inputs but underestimates the RM cost + // for smaller sizes. + + // Compute the Rabin-Miller cost estimate. We reduce it by 20% because our cost model does not take + // into account some of the trial-division cost such as memory footprint, cache pressure, + // setup cost, etc. Reducing the Rabin-Miller cost leads us to do fewer trial divisions to approximately + // balance the hidden costs. + + if( nDigits <= 1000 ) + { + // nDigits is small enough to not have any overflows in this computation + if( nDigits == 0 ) + { + goto cleanup; // return NULL + } + + cRabinMillerCost = (UINT64) nDigits * nDigits * nDigits * (SYMCRYPT_RABINMILLER_DIGIT_CYCLES * 8 / 10); + i = 0; + minPrime = 0; + for(;;) + { + nPrimes = g_SymCryptSmallPrimeGroupsSpec[i].nPrimes; + maxPrime = g_SymCryptSmallPrimeGroupsSpec[i].maxPrime; + nGroups = g_SymCryptSmallPrimeGroupsSpec[i].nGroups; + cPerPrimeCost = (UINT64) nDigits * SYMCRYPT_TRIALDIVISION_DIGIT_REDUCTION_CYCLES / nPrimes + SYMCRYPT_TRIALDIVISION_DIVIDE_TEST_CYCLES; + + // If the last group isn't worth it, we shouldn't go to even fewer primes + if( nGroups == 0 || maxPrime * cPerPrimeCost >= cRabinMillerCost) + { + break; + } + i++; + minPrime = maxPrime; + } + + // Now we know how many primes are in the last groups, let's find out how large the largest prime should be + tmp64 = cRabinMillerCost / cPerPrimeCost; + tmp64 = SYMCRYPT_MIN( tmp64, SYMCRYPT_TRIALDIVISION_MAX_SMALL_PRIME ); + maxPrime = (UINT32) tmp64; + maxPrime = SYMCRYPT_MAX( maxPrime, minPrime ); // Make sure we don't fall into the previous group size that we don't want + } + else + { + maxPrime = SYMCRYPT_TRIALDIVISION_MAX_SMALL_PRIME; + } + + nSmallPrimes = SymCryptGenerateSmallPrimes( maxPrime, &pSmallPrimeList ); + + // Find out how many groups we'll have, and how many actual primes we'll use + n = nSmallPrimes; + nG = 0; + nP = 0; + i = 0; + for(;;) + { + nPrimes = g_SymCryptSmallPrimeGroupsSpec[i].nPrimes; + nGroups = g_SymCryptSmallPrimeGroupsSpec[i].nGroups; + + if( n < nPrimes * nGroups || nGroups == 0 ) + { + // At the right nPrimes, compute exactly how many groups to add + n = n / nPrimes; + nG += n; + nP += n * nPrimes; + n = 0; // No primes left + break; + } + + // Use up all the groups of this size... + nG += nGroups; + nP += nPrimes * nGroups; + n -= nPrimes * nGroups; + i++; + } + + // dcl - Potential integer overflow + // Need to document sizes, and limits of nG, nP, and confirm + // an overflow is not possible, also recall that size_t varies in size, but nBytes is 32-bit + nBytes = sizeof( SYMCRYPT_TRIALDIVISION_CONTEXT ) + + (nG + 1) * sizeof( SYMCRYPT_TRIALDIVISION_GROUP ) // + 1 for 0 sentinel + + (nP + 1) * sizeof( SYMCRYPT_TRIALDIVISION_PRIME ) // + 1 for 0 sentinel + + (nP + 1) * sizeof( UINT32 ); // + 1 for 0 sentinel + + pAlloc = SymCryptCallbackAlloc( nBytes ); + if( pAlloc == NULL ) + { + goto cleanup; + } + + pRes = (PSYMCRYPT_TRIALDIVISION_CONTEXT) pAlloc; + pAlloc += sizeof( *pRes ); + + pRes->nBytesAlloc = nBytes; + + pRes->pGroupList = (PSYMCRYPT_TRIALDIVISION_GROUP)pAlloc; + pAlloc += (nG + 1) * sizeof( SYMCRYPT_TRIALDIVISION_GROUP ); + + pRes->pPrimeList = (PSYMCRYPT_TRIALDIVISION_PRIME) pAlloc; + pAlloc += (nP + 1) * sizeof( SYMCRYPT_TRIALDIVISION_PRIME ); + + pRes->pPrimes = (PUINT32) pAlloc; + pAlloc += (nP + 1) * sizeof( UINT32 ); + + SYMCRYPT_ASSERT( nBytes == (SIZE_T)(pAlloc - (PBYTE)pRes) ); + + // Initialize the primes 3, 5, and 17 + SymCryptFdefInitTrialdivisionPrime( 3, &pRes->Primes3_5_17[0] ); + SymCryptFdefInitTrialdivisionPrime( 5, &pRes->Primes3_5_17[1] ); + SymCryptFdefInitTrialdivisionPrime( 17, &pRes->Primes3_5_17[2] ); + + memcpy( pRes->pPrimes, pSmallPrimeList, nP * sizeof( UINT32 ) ); + pRes->pPrimes[nP] = 0; + pRes->maxTrialPrime = pRes->pPrimes[nP-1]; + + /* + *** Old code to decrypt the nibble encoding. Keep in case we want it back later... + // Generate the other primes from the difference table. + // We initialize the prime structures, and a list of the primes that is used to compute the group specs + + pNibs = &g_SymCryptSmallPrimeDifferenceNibbles[0]; + + smallPrime = 3; + nPrimes = 0; + while( smallPrime < SYMCRYPT_MAX_SMALL_PRIME ) + { + b = *pNibs++; + nib = b & 0xf; + + if( nib == 0 ) + { + smallPrime += 30; + // No check for termination here as we wouldn't encode a 0 if there wasn't another prime. + } else { + smallPrime += 2*nib; + pRes->pPrimes[nPrimes] = smallPrime; + SymCryptFdefInitTrialdivisionPrime( smallPrime, &pRes->pPrimeList[nPrimes] ); + nPrimes++; + if( smallPrime >= SYMCRYPT_MAX_SMALL_PRIME ) + { + break; + } + } + nib = b >> 4; + if( nib == 0 ) + { + smallPrime += 30; + } else { + smallPrime += 2*nib; + pRes->pPrimes[nPrimes] = smallPrime; + SymCryptFdefInitTrialdivisionPrime( smallPrime, &pRes->pPrimeList[nPrimes] ); + nPrimes++; + } + } + SYMCRYPT_ASSERT( smallPrime == SYMCRYPT_MAX_SMALL_PRIME && nPrimes == SYMCRYPT_N_SMALL_PRIMES_ENCODED ); + */ + + for( iPrime = 0; iPrime < nP; iPrime++ ) + { + SymCryptFdefInitTrialdivisionPrime( pRes->pPrimes[iPrime], &pRes->pPrimeList[iPrime] ); + } + + // Add the trailing 0s + pRes->pPrimeList[nP].invMod2e64 = 0; + pRes->pPrimeList[nP].compareLimit = 0; + + // Make sure we have the 32-bit tables, not the 64-bit ones. + // dcl - warning suppression is not portable. Also, if it is a compile time constant, shouldn't it be a compile assert? +#pragma warning( suppress: 4127 ) // conditional expression is constant + SYMCRYPT_ASSERT( SYMCRYPT_MAX_SMALL_PRIME_GROUP_PRODUCT <= (UINT32)-1 ); + + iGroup = 0; + iPrime = 0; + iGroupSpec = 0; + nPrimes = g_SymCryptSmallPrimeGroupsSpec[iGroupSpec].nPrimes; + nGroups = g_SymCryptSmallPrimeGroupsSpec[iGroupSpec].nGroups; + while( iPrime < nP ) + { + if( nGroups == 0 ) + { + iGroupSpec +=1 ; + nPrimes = g_SymCryptSmallPrimeGroupsSpec[iGroupSpec].nPrimes; + nGroups = g_SymCryptSmallPrimeGroupsSpec[iGroupSpec].nGroups; + if( nGroups == 0 ) + { + nGroups = nG - iGroup; + } + } + + SYMCRYPT_ASSERT( iPrime + nPrimes <= nP ); + M = pRes->pPrimes[iPrime++]; + for( j=1; j<nPrimes; j++ ) + { + SYMCRYPT_ASSERT( M <= SYMCRYPT_MAX_SMALL_PRIME_GROUP_PRODUCT / pRes->pPrimes[iPrime] ); + M *= pRes->pPrimes[iPrime++]; + } + SymCryptFdefInitTrialDivisionGroup( &pRes->pGroupList[iGroup], nPrimes, M ); + iGroup++; + + nGroups--; + } + + SYMCRYPT_ASSERT( iPrime == nP && iGroup == nG ); + + // Add the trailing sentinel group + pRes->pGroupList[iGroup].nPrimes = 0; + +cleanup: + if( pSmallPrimeList != NULL ) + { + SymCryptWipe( pSmallPrimeList, nSmallPrimes * sizeof( UINT32 ) ); + SymCryptCallbackFree( pSmallPrimeList ); + pSmallPrimeList = NULL; + } + return pRes; +} + +VOID +SYMCRYPT_CALL +SymCryptFdefFreeTrialDivisionContext( PCSYMCRYPT_TRIALDIVISION_CONTEXT pContext ) +{ + // No security reason to wipe it, but our test code verifies that we wipe everything... + // Perf cost is minor + SymCryptWipe( (PBYTE) pContext, pContext->nBytesAlloc ); + SymCryptCallbackFree( (PSYMCRYPT_TRIALDIVISION_CONTEXT) pContext ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntFindSmallDivisor( + _In_ PCSYMCRYPT_TRIALDIVISION_CONTEXT pContext, + _In_ PCSYMCRYPT_INT piSrc, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + PCUINT32 pSrc = SYMCRYPT_FDEF_INT_PUINT32( piSrc ); + PCUINT32 p; + UINT32 nDigits = piSrc->nDigits; + UINT32 nUint32 = nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; + UINT64 Acc; + PCSYMCRYPT_TRIALDIVISION_GROUP pGroup; + PCSYMCRYPT_TRIALDIVISION_PRIME pPrime; + UINT32 nPrimes; + UINT32 res; + + // Check for 2. Not really needed for prime generation, but it makes the function easier to test/document/describe. + if( (*pSrc & 1) == 0 ) + { + res = 2; + goto cleanup; + } + + // Check the factors 3, 5, 17. These are special as they divide 2^32 - 1 + // (We could also do 257 and 65537 but that doesn't seem worth the added complexity.) + Acc = 0; + p = pSrc; + do { +#if SYMCRYPT_FDEF_DIGIT_SIZE == 16 + Acc = Acc + p[0] + p[1] + p[2] + p[3]; + p += 4; +#elif (SYMCRYPT_FDEF_DIGIT_SIZE % 32) == 0 + Acc = Acc + p[0] + p[1] + p[2] + p[3] + p[4] + p[5] + p[6] + p[7]; + p += 8; +#else + // dcl - ideally, #error would have a descriptive message so it is easily found in code if encountered, same below +#error ?? +#endif + } while( p < pSrc + nUint32 ); + + if( SymCryptIsMultipleOfSmallPrime( Acc, &pContext->Primes3_5_17[0] ) ) + { + res = 3; + goto cleanup; + } + + if( SymCryptIsMultipleOfSmallPrime( Acc, &pContext->Primes3_5_17[1] ) ) + { + res = 5; + goto cleanup; + } + + if( SymCryptIsMultipleOfSmallPrime( Acc, &pContext->Primes3_5_17[2] ) ) + { + res = 17; + goto cleanup; + } + + pGroup = pContext->pGroupList; + pPrime = pContext->pPrimeList; + while( (nPrimes = pGroup->nPrimes) != 0 ) + { + // Reduce Src modulo the group product to a 64-bit value + Acc = 0; + p = pSrc + nUint32; + +#if SYMCRYPT_FDEF_DIGIT_SIZE == 16 + if( (nUint32 & 4) != 0 ) + { + // nUInt32 is 4 mod 8, process the top 4 words only + p -= 4; + Acc = + p[0] + + SYMCRYPT_MUL32x32TO64( p[1], pGroup->factor[0] ) + + SYMCRYPT_MUL32x32TO64( p[2], pGroup->factor[1] ) + + SYMCRYPT_MUL32x32TO64( p[3], pGroup->factor[2] ); + } else { + // Process 8 words to start + p -= 8; + Acc = + p[0] + + SYMCRYPT_MUL32x32TO64( p[1], pGroup->factor[0] ) + + SYMCRYPT_MUL32x32TO64( p[2], pGroup->factor[1] ) + + SYMCRYPT_MUL32x32TO64( p[3], pGroup->factor[2] ) + + SYMCRYPT_MUL32x32TO64( p[4], pGroup->factor[3] ) + + SYMCRYPT_MUL32x32TO64( p[5], pGroup->factor[4] ) + + SYMCRYPT_MUL32x32TO64( p[6], pGroup->factor[5] ) + + SYMCRYPT_MUL32x32TO64( p[7], pGroup->factor[6] ); + } +#elif (SYMCRYPT_FDEF_DIGIT_SIZE % 32) == 0 + + p -= 8; + Acc = + p[0] + + SYMCRYPT_MUL32x32TO64( p[1], pGroup->factor[0] ) + + SYMCRYPT_MUL32x32TO64( p[2], pGroup->factor[1] ) + + SYMCRYPT_MUL32x32TO64( p[3], pGroup->factor[2] ) + + SYMCRYPT_MUL32x32TO64( p[4], pGroup->factor[3] ) + + SYMCRYPT_MUL32x32TO64( p[5], pGroup->factor[4] ) + + SYMCRYPT_MUL32x32TO64( p[6], pGroup->factor[5] ) + + SYMCRYPT_MUL32x32TO64( p[7], pGroup->factor[6] ); + +#else +#error ?? +#endif + while( p > pSrc ) + { + p -= 8; + Acc = + p[0] + + SYMCRYPT_MUL32x32TO64( p[1], pGroup->factor[0] ) + + SYMCRYPT_MUL32x32TO64( p[2], pGroup->factor[1] ) + + SYMCRYPT_MUL32x32TO64( p[3], pGroup->factor[2] ) + + SYMCRYPT_MUL32x32TO64( p[4], pGroup->factor[3] ) + + SYMCRYPT_MUL32x32TO64( p[5], pGroup->factor[4] ) + + SYMCRYPT_MUL32x32TO64( p[6], pGroup->factor[5] ) + + SYMCRYPT_MUL32x32TO64( p[7], pGroup->factor[6] ) + + SYMCRYPT_MUL32x32TO64( (UINT32) Acc , pGroup->factor[7] ) + + SYMCRYPT_MUL32x32TO64( (UINT32)(Acc >> 32), pGroup->factor[8] ); + } + + // Now we check whether we have a multiple of one of the primes + while( nPrimes > 0 ) + { + if( SymCryptIsMultipleOfSmallPrime( Acc, pPrime ) ) + { + res = pContext->pPrimes[ (pPrime - pContext->pPrimeList) ]; // pointer subtraction auto-divides by size... + goto cleanup; + } + pPrime++; + nPrimes--; + } + + pGroup++; + } + + UNREFERENCED_PARAMETER( pbScratch ); + UNREFERENCED_PARAMETER( cbScratch ); + + // Did not find a small factor, return zero + res = 0; + +cleanup: + return res; +} + +/* Wine hack: asm not supported yet */ + +VOID +SYMCRYPT_CALL +SymCryptFdefMaskedCopyAsm( + _In_reads_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE ) PCBYTE pbSrc, + _Inout_updates_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE ) PBYTE pbDst, + UINT32 nDigits, + UINT32 mask ) +{ + SymCryptFdefMaskedCopyC( pbSrc, pbDst, nDigits, mask ); +} diff --git a/libs/symcrypt/lib/fdef_int.c b/libs/symcrypt/lib/fdef_int.c new file mode 100644 index 00000000000..ba50e184802 --- /dev/null +++ b/libs/symcrypt/lib/fdef_int.c @@ -0,0 +1,1321 @@ +// +// fdef_int.c INT functions for default number format +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +// +// Default big-number format: +// INT objects are stored in two parts: +// a SYMCRYPT_FDEF_INT structure +// an array of UINT32; the # elements in the array is a multiple of SYMCRYPT_FDEF_DIGIT_SIZE/4. +// +// The pointer passed points to the start of the UINT32 array, just after the SYMCRYPT_FDEF_INT structure. +// +// The generic implementation accesses the digits as an array of UINT32, but on 64-bit CPUs +// the code can also view it as an array of UINT64. +// + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawAddC( + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc1, + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc2, + _Out_writes_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PUINT32 pDst, + UINT32 nDigits ) +{ + UINT32 i; + UINT64 t; + + t = 0; + for( i=0; i<nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; i++ ) + { + t = t + pSrc1[i] + pSrc2[i]; + pDst[i] = (UINT32) t; + t >>= 32; + } + + return (UINT32) t; +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawAdd( + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc1, + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc2, + _Out_writes_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PUINT32 pDst, + UINT32 nDigits ) +{ +#if SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_ARM64 | SYMCRYPT_CPU_ARM + return SymCryptFdefRawAddAsm( pSrc1, pSrc2, pDst, nDigits ); +#else + return SymCryptFdefRawAddC( pSrc1, pSrc2, pDst, nDigits ); +#endif +} + + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawAddUint32( + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 Src1, + UINT32 Src2, + _Out_writes_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PUINT32 Dst, + UINT32 nDigits ) +{ + UINT32 i; + UINT64 t; + + t = Src2; + for( i=0; i<nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; i++ ) + { + t = t + Src1[i]; + Dst[i] = (UINT32) t; + t >>= 32; + } + + return (UINT32) t; +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntAddUint32( + _In_ PCSYMCRYPT_INT piSrc1, + UINT32 u32Src2, + _Out_ PSYMCRYPT_INT piDst ) +{ + SYMCRYPT_CHECK_MAGIC( piSrc1 ); + SYMCRYPT_CHECK_MAGIC( piDst ); + + SYMCRYPT_ASSERT( piSrc1->nDigits == piDst->nDigits ); + + return SymCryptFdefRawAddUint32( SYMCRYPT_FDEF_INT_PUINT32( piSrc1 ), u32Src2, SYMCRYPT_FDEF_INT_PUINT32( piDst ), piDst->nDigits ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntAddSameSize( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + _Out_ PSYMCRYPT_INT piDst ) +{ + SYMCRYPT_ASSERT( piSrc1->nDigits == piSrc2->nDigits && piSrc2->nDigits == piDst->nDigits ); + + return SymCryptFdefRawAdd( SYMCRYPT_FDEF_INT_PUINT32( piSrc1 ), + SYMCRYPT_FDEF_INT_PUINT32( piSrc2 ), + SYMCRYPT_FDEF_INT_PUINT32( piDst ), + piDst->nDigits ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntAddMixedSize( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + _Out_ PSYMCRYPT_INT piDst ) +{ + UINT32 nS1 = piSrc1->nDigits; + UINT32 nS2 = piSrc2->nDigits; + UINT32 nD = piDst->nDigits; + UINT32 c; + UINT32 nW; + + SYMCRYPT_ASSERT( nD >= nS1 && nD >= nS2 ); + + if( nS1 < nS2 ) + { + c = SymCryptFdefRawAdd( SYMCRYPT_FDEF_INT_PUINT32( piSrc1 ), SYMCRYPT_FDEF_INT_PUINT32( piSrc2 ), SYMCRYPT_FDEF_INT_PUINT32( piDst ), nS1 ); + c = SymCryptFdefRawAddUint32( &SYMCRYPT_FDEF_INT_PUINT32( piSrc2 )[nS1 * SYMCRYPT_FDEF_DIGIT_NUINT32], c, &SYMCRYPT_FDEF_INT_PUINT32( piDst )[nS1 * SYMCRYPT_FDEF_DIGIT_NUINT32], nS2 - nS1 ); + nW = nS2; + } else { + // nS2 < nS1 + c = SymCryptFdefRawAdd( SYMCRYPT_FDEF_INT_PUINT32( piSrc1 ), SYMCRYPT_FDEF_INT_PUINT32( piSrc2 ), SYMCRYPT_FDEF_INT_PUINT32( piDst ), nS2 ); + c = SymCryptFdefRawAddUint32( &SYMCRYPT_FDEF_INT_PUINT32( piSrc1 )[nS2 * SYMCRYPT_FDEF_DIGIT_NUINT32], c, &SYMCRYPT_FDEF_INT_PUINT32( piDst )[nS2 * SYMCRYPT_FDEF_DIGIT_NUINT32], nS1 - nS2 ); + nW = nS1; + } + + if( nW < nD ) + { + SymCryptWipe( &SYMCRYPT_FDEF_INT_PUINT32( piDst )[nW * SYMCRYPT_FDEF_DIGIT_NUINT32], (nD - nW) * SYMCRYPT_FDEF_DIGIT_SIZE ); + SYMCRYPT_FDEF_INT_PUINT32( piDst )[nW * SYMCRYPT_FDEF_DIGIT_NUINT32] = c; + c = 0; + } + + return c; +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawSubC( + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc1, + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc2, + _Out_writes_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PUINT32 pDst, + UINT32 nDigits ) +{ + UINT32 i; + UINT64 t; + UINT32 c; + + c = 0; + for( i=0; i<nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; i++ ) + { + // c == 1 for carry, 0 for no carry + t = (UINT64) pSrc1[i] - pSrc2[i] - c; + pDst[i] = (UINT32) t; + c = (UINT32)(t >> 32) & 1; + } + + return c; +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawSub( + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc1, + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc2, + _Out_writes_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PUINT32 pDst, + UINT32 nDigits ) +{ +#if SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_ARM64 | SYMCRYPT_CPU_ARM + return SymCryptFdefRawSubAsm( pSrc1, pSrc2, pDst, nDigits ); +#else + return SymCryptFdefRawSubC( pSrc1, pSrc2, pDst, nDigits ); +#endif +} + + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawSubUint32( + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc1, + UINT32 Src2, + _Out_writes_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PUINT32 pDst, + UINT32 nDigits ) +{ + UINT32 i; + UINT64 t; + UINT32 c; + + c = Src2; + for( i=0; i<nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; i++ ) + { + t = (UINT64)pSrc1[i] - c; + pDst[i] = (UINT32) t; + c = (UINT32)(t >> 32) & 1; + } + + return c; +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawNeg( + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc1, + UINT32 carryIn, + _Out_writes_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PUINT32 pDst, + UINT32 nDigits ) +{ + UINT32 i; + UINT64 t; + UINT32 c; + + c = carryIn; + for( i=0; i<nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; i++ ) + { + t = (UINT64)0 - pSrc1[i] - c; + pDst[i] = (UINT32) t; + c = (UINT32)(t >> 32) & 1; + } + + return c; +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntSubUint32( + _In_ PCSYMCRYPT_INT piSrc1, + UINT32 u32Src2, + _Out_ PSYMCRYPT_INT piDst ) +{ + SYMCRYPT_ASSERT( piSrc1->nDigits == piDst->nDigits ); + + return SymCryptFdefRawSubUint32( SYMCRYPT_FDEF_INT_PUINT32( piSrc1 ), u32Src2, SYMCRYPT_FDEF_INT_PUINT32( piDst ), piDst->nDigits ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntSubSameSize( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + _Out_ PSYMCRYPT_INT piDst ) +{ + SYMCRYPT_ASSERT( piSrc1->nDigits == piSrc2->nDigits && piSrc1->nDigits == piDst->nDigits ); + + return SymCryptFdefRawSub( SYMCRYPT_FDEF_INT_PUINT32( piSrc1 ), SYMCRYPT_FDEF_INT_PUINT32( piSrc2 ), SYMCRYPT_FDEF_INT_PUINT32( piDst ), piDst->nDigits ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntSubMixedSize( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + _Out_ PSYMCRYPT_INT piDst ) +{ + UINT32 nS1 = piSrc1->nDigits; + UINT32 nS2 = piSrc2->nDigits; + UINT32 nD = piDst->nDigits; + UINT32 c; + UINT32 n; + + SYMCRYPT_ASSERT( nD >= nS1 && nD >= nS2 ); + + if( nS1 < nS2 ) + { + c = SymCryptFdefRawSub( SYMCRYPT_FDEF_INT_PUINT32( piSrc1 ), SYMCRYPT_FDEF_INT_PUINT32( piSrc2 ), SYMCRYPT_FDEF_INT_PUINT32( piDst ), nS1 ); + c = SymCryptFdefRawNeg( &SYMCRYPT_FDEF_INT_PUINT32( piSrc2 )[nS1 * SYMCRYPT_FDEF_DIGIT_NUINT32], c, &SYMCRYPT_FDEF_INT_PUINT32( piDst )[nS1 * SYMCRYPT_FDEF_DIGIT_NUINT32], nS2 - nS1 ); + n = nS2 * SYMCRYPT_FDEF_DIGIT_NUINT32; + } else { + // nS2 < nS1 + c = SymCryptFdefRawSub( SYMCRYPT_FDEF_INT_PUINT32( piSrc1 ), SYMCRYPT_FDEF_INT_PUINT32( piSrc2 ), SYMCRYPT_FDEF_INT_PUINT32( piDst ), nS2 ); + c = SymCryptFdefRawSubUint32( &SYMCRYPT_FDEF_INT_PUINT32( piSrc1 )[nS2 * SYMCRYPT_FDEF_DIGIT_NUINT32], c, &SYMCRYPT_FDEF_INT_PUINT32( piDst )[nS2 * SYMCRYPT_FDEF_DIGIT_NUINT32], nS1 - nS2 ); + n = nS1 * SYMCRYPT_FDEF_DIGIT_NUINT32; + } + + // + // Set the rest of the result to 0s or 1s + // + while( n < nD * SYMCRYPT_FDEF_DIGIT_NUINT32 ) + { + SYMCRYPT_FDEF_INT_PUINT32( piDst )[n++] = 0 - c; + } + + return c; +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawIsLessThanC( + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc1, + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc2, + UINT32 nDigits ) +{ + UINT32 i; + UINT64 t; + UINT32 c; + + // We just do a subtraction without writing and return the carry + c = 0; + for( i=0; i<nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; i++ ) + { + // c == 1 for carry, 0 for no carry + t = (UINT64) pSrc1[i] - pSrc2[i] - c; + c = (UINT32)(t >> 32) & 1; + } + + // All booleans are returned as masks + return 0 - c; +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawIsLessThan( + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc1, + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc2, + UINT32 nDigits ) +{ +#if 0 & SYMCRYPT_CPU_AMD64 +// return SymCryptFdefRawIsLessThanAsm( pSrc1, pSrc2, nDigits ); +#else + return SymCryptFdefRawIsLessThanC( pSrc1, pSrc2, nDigits ); +#endif +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawIsZeroC( + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc1, + UINT32 nDigits ) +{ + UINT32 i; + UINT32 c; + + c = 0; + for( i=0; i<nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; i++ ) + { + c |= pSrc1[i]; + } + + // All booleans are returned as masks + return SYMCRYPT_MASK32_ZERO( c ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawIsZero( + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc1, + UINT32 nDigits ) +{ +#if 0 & SYMCRYPT_CPU_AMD64 +// return SymCryptFdefRawIsZeroAsm( pSrc1, nDigits ); +#else + return SymCryptFdefRawIsZeroC( pSrc1, nDigits ); +#endif +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntIsLessThan( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2 ) +{ + UINT32 nD1 = piSrc1->nDigits; + UINT32 nD2 = piSrc2->nDigits; + + UINT32 res; + + if( nD1 == nD2 ) + { + res = SymCryptFdefRawIsLessThan( SYMCRYPT_FDEF_INT_PUINT32( piSrc1 ), SYMCRYPT_FDEF_INT_PUINT32( piSrc2 ), nD1 ); + } else if( nD1 < nD2 ) { + res = SymCryptFdefRawIsLessThan( SYMCRYPT_FDEF_INT_PUINT32( piSrc1 ), SYMCRYPT_FDEF_INT_PUINT32( piSrc2 ), nD1 ); + res |= ~SymCryptFdefRawIsZero( &SYMCRYPT_FDEF_INT_PUINT32( piSrc2 )[ nD1 * SYMCRYPT_FDEF_DIGIT_NUINT32 ], nD2 - nD1 ); + } else { + res = SymCryptFdefRawIsLessThan( SYMCRYPT_FDEF_INT_PUINT32( piSrc1 ), SYMCRYPT_FDEF_INT_PUINT32( piSrc2 ), nD2 ); + res &= SymCryptFdefRawIsZero( &SYMCRYPT_FDEF_INT_PUINT32( piSrc1 )[ nD2 * SYMCRYPT_FDEF_DIGIT_NUINT32 ], nD1 - nD2 ); + } + + return res; +} + + +VOID +SYMCRYPT_CALL +SymCryptFdefIntNeg( + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_INT piDst ) +{ + UINT32 nDigits = piDst->nDigits; + SYMCRYPT_ASSERT( piSrc->nDigits == nDigits ); + + SymCryptFdefRawNeg( SYMCRYPT_FDEF_INT_PUINT32( piSrc ), 0, SYMCRYPT_FDEF_INT_PUINT32( piDst ), nDigits ); +} + + +VOID +SYMCRYPT_CALL +SymCryptFdefIntMulPow2( + _In_ PCSYMCRYPT_INT piSrc, + SIZE_T Exp, + _Out_ PSYMCRYPT_INT piDst ) +{ + SYMCRYPT_ASSERT( piSrc->nDigits == piDst->nDigits ); + + SIZE_T shiftWords = Exp / (8 * sizeof( UINT32 ) ); + SIZE_T shiftBits = Exp % (8 * sizeof( UINT32 ) ); + + UINT32 nWords = piDst->nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; + + if( shiftWords >= nWords ) + { + SymCryptWipe( SYMCRYPT_FDEF_INT_PUINT32( piDst ), nWords * sizeof( UINT32 ) ); + goto cleanup; + } + + SIZE_T i = nWords; + while( i > shiftWords ) + { + i--; + UINT64 t = (UINT64)SYMCRYPT_FDEF_INT_PUINT32( piSrc )[i - shiftWords] << 32; + if( i > shiftWords ) + { + t |= SYMCRYPT_FDEF_INT_PUINT32( piSrc )[i - shiftWords - 1]; + } + SYMCRYPT_FDEF_INT_PUINT32( piDst )[i] = (UINT32)(t >> (32 - shiftBits)); + } + + while( i > 0 ) + { + i--; + SYMCRYPT_FDEF_INT_PUINT32( piDst )[i] = 0; + } + +cleanup: + ; +} + +// In shift-based operations which we have no assembly for, and we'd like to use 32-bit words +// on 32-bit architectures and 64-bit words on 64-bit architectures. So we use NATIVE_UINT & +// friends. + +// Note that accessing the FDEF uint32 array as an array of NATIVE_UINTs relies on +// the little-endianness of the target if NATIVE_UINT is larger than 32 bits. +// AMD64 is little endian and ARM64 code is always expected to execute in little +// endian mode, but this is not true in general for an arbitrary 64 bit platform. +// +// If we need to support a 64 bit big endian platform, we need to either +// restrict its NATIVE_UINT to 32 bits, or introduce load and store macros. +#define SYMCRYPT_FDEF_INT_PNATIVE_UINT(p) ((NATIVE_UINT*) SYMCRYPT_FDEF_INT_PUINT32( p )) +// Ensure that sizeof(NATIVE_UINT) > 4 only when compiling for known little endian target +C_ASSERT( (NATIVE_BYTES <= 4) || SYMCRYPT_CPU_AMD64 || SYMCRYPT_CPU_ARM64 ); + +#define SYMCRYPT_FDEF_DIGIT_NNATIVE_UINT ((NATIVE_UINT)(SYMCRYPT_FDEF_DIGIT_SIZE / NATIVE_BYTES)) + +// Ensure that digit is divisible by native word size! +C_ASSERT(SYMCRYPT_FDEF_DIGIT_NNATIVE_UINT * NATIVE_BYTES == SYMCRYPT_FDEF_DIGIT_SIZE); + +VOID +SYMCRYPT_CALL +SymCryptFdefIntDivPow2( + _In_ PCSYMCRYPT_INT piSrc, + SIZE_T exp, + _Out_ PSYMCRYPT_INT piDst ) +{ + SIZE_T shiftWords = exp / NATIVE_BITS; + SIZE_T shiftRightBits = exp % NATIVE_BITS; + SIZE_T shiftLeftBits = (NATIVE_BITS-1) - shiftRightBits; + NATIVE_UINT lowWord, highWord, highPart; + SIZE_T i = 0; + + NATIVE_UINT nWords = piDst->nDigits * SYMCRYPT_FDEF_DIGIT_NNATIVE_UINT; + + SYMCRYPT_ASSERT( piSrc->nDigits == piDst->nDigits ); + + shiftWords = SYMCRYPT_MIN(shiftWords, nWords); + if( shiftWords < nWords ) + { + lowWord = SYMCRYPT_FDEF_INT_PNATIVE_UINT(piSrc)[shiftWords]; + while( i+shiftWords+1 < nWords ) + { + highWord = SYMCRYPT_FDEF_INT_PNATIVE_UINT(piSrc)[i+shiftWords+1]; + + // We always shift highWord left by 1 to keep variable shiftLeftBits in range [0,NATIVE_BITS-1] + highPart = (highWord << shiftLeftBits)<<1; + + SYMCRYPT_FDEF_INT_PNATIVE_UINT(piDst)[i] = (lowWord >> shiftRightBits) | highPart; + + lowWord = highWord; + i++; + } + SYMCRYPT_FDEF_INT_PNATIVE_UINT(piDst)[i] = (lowWord >> shiftRightBits); + i++; + } + + SYMCRYPT_ASSERT(i + shiftWords == nWords); + + SymCryptWipe( &SYMCRYPT_FDEF_INT_PNATIVE_UINT( piDst )[nWords-shiftWords], shiftWords * NATIVE_BYTES ); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefIntShr1( + UINT32 highestBit, + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_INT piDst ) +{ + UINT32 nWords = piDst->nDigits * SYMCRYPT_FDEF_DIGIT_NNATIVE_UINT; + + SYMCRYPT_ASSERT( piSrc->nDigits == piDst->nDigits ); + SYMCRYPT_ASSERT( highestBit < 2 ); + + SIZE_T i = 0; + NATIVE_UINT lowWord = SYMCRYPT_FDEF_INT_PNATIVE_UINT(piSrc)[0]; + NATIVE_UINT highWord = 0; + while( i+1 < nWords ) + { + highWord = SYMCRYPT_FDEF_INT_PNATIVE_UINT(piSrc)[i+1]; + + SYMCRYPT_FDEF_INT_PNATIVE_UINT(piDst)[i] = (lowWord >> 1) | (highWord << (NATIVE_BITS - 1)); + + lowWord = highWord; + i++; + } + + SYMCRYPT_FDEF_INT_PNATIVE_UINT(piDst)[i] = (lowWord >> 1) | ((NATIVE_UINT)highestBit) << (NATIVE_BITS - 1); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefIntModPow2( + _In_ PCSYMCRYPT_INT piSrc, + SIZE_T exp, + _Out_ PSYMCRYPT_INT piDst ) +{ + SIZE_T expWords = exp / 32; // index of word with the partial mask + SIZE_T expBits = exp % 32; // # bits to leave in that word + + UINT32 nWords = piDst->nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; + + SYMCRYPT_ASSERT( piSrc->nDigits == piDst->nDigits ); + + if( piSrc != piDst ) + { + memcpy( SYMCRYPT_FDEF_INT_PUINT32( piDst ), SYMCRYPT_FDEF_INT_PUINT32( piSrc ), nWords * sizeof( UINT32 ) ); + } + + if( expWords >= nWords ) + { + // exp is so large that Dst = Src is sufficient. + goto cleanup; + } + + for( SIZE_T i=expWords + 1; i < nWords; i++ ) + { + SYMCRYPT_FDEF_INT_PUINT32( piDst )[i] = 0; + } + + if( expBits != 0 ) + { + SYMCRYPT_FDEF_INT_PUINT32( piDst )[expWords] &= ((UINT32) -1) >> (32 - expBits ); + } else { + SYMCRYPT_FDEF_INT_PUINT32( piDst )[expWords] = 0; + } + +cleanup: + ; +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntGetBit( + _In_ PCSYMCRYPT_INT piSrc, + UINT32 iBit ) +{ + SYMCRYPT_ASSERT( iBit < piSrc->nDigits * SYMCRYPT_FDEF_DIGIT_BITS ); + + return (((SYMCRYPT_FDEF_INT_PUINT32( piSrc)[iBit / 32]) >> (iBit % 32)) & 1); +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntGetBits( + _In_ PCSYMCRYPT_INT piSrc, + UINT32 iBit, + UINT32 nBits ) +{ + UINT32 mainMask = 0; + UINT32 result = 0; + + SYMCRYPT_ASSERT( (nBits > 0) && + (nBits < 33) && + (iBit < piSrc->nDigits * SYMCRYPT_FDEF_DIGIT_BITS) && + (iBit + nBits <= piSrc->nDigits * SYMCRYPT_FDEF_DIGIT_BITS) ); + + mainMask = (UINT32)(-1) >> (32-nBits); + + // Get the lower word first (it exists since iBit is smaller than the max bit) + result = SYMCRYPT_FDEF_INT_PUINT32(piSrc)[iBit/32]; + + // Shift to the right accordingly + result >>= (iBit%32); + + // Get the upper word (if we need it) + // Note: the iBit and nBits values are public + if ((iBit%32!=0) && ( iBit/32 + 1 < piSrc->nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32 )) + { + result |= ( SYMCRYPT_FDEF_INT_PUINT32(piSrc)[iBit/32+1] << (32 - iBit%32) ); + } + + // Mask out the top bits + result &= mainMask; + + return result; +} + +VOID +SYMCRYPT_CALL +SymCryptFdefIntSetBits( + _In_ PSYMCRYPT_INT piDst, + UINT32 value, + UINT32 iBit, + UINT32 nBits ) +{ + UINT32 mainMask = 0; + + UINT32 alignedVal = 0; + UINT32 alignedMask = 0; + + SYMCRYPT_ASSERT( (nBits > 0) && + (nBits < 33) && + (iBit < piDst->nDigits * SYMCRYPT_FDEF_DIGIT_BITS) && + (iBit + nBits <= piDst->nDigits * SYMCRYPT_FDEF_DIGIT_BITS) ); + + // Zero out the not needed bits of the value + mainMask = (UINT32)(-1) >> (32-nBits); + value &= mainMask; + + // + // Lower word + // + + // Create the needed mask + alignedMask = mainMask << (iBit%32); + + // Align the value + alignedVal = value << (iBit%32); + + // Set the lower word first (it exists since iBit is smaller than the max bit) + SYMCRYPT_FDEF_INT_PUINT32(piDst)[iBit/32] = (SYMCRYPT_FDEF_INT_PUINT32(piDst)[iBit/32] & ~alignedMask) | alignedVal; + + // + // Upper word + // + + if ((iBit%32!=0) && ( iBit/32 + 1 < piDst->nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32 )) + { + // Create the needed mask + alignedMask = mainMask >> (32 - iBit%32); + + // Align the value + alignedVal = value >> (32 - iBit%32); + + // Set the upper word + SYMCRYPT_FDEF_INT_PUINT32(piDst)[iBit/32 + 1] = (SYMCRYPT_FDEF_INT_PUINT32(piDst)[iBit/32 + 1] & ~alignedMask) | alignedVal; + } + +} + + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntMulUint32( + _In_ PCSYMCRYPT_INT piSrc1, + UINT32 Src2, + _Out_ PSYMCRYPT_INT piDst ) +{ + UINT32 nWords = piDst->nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; + + SYMCRYPT_ASSERT( piSrc1->nDigits == piDst->nDigits ); + + UINT64 c = 0; + for( UINT32 i=0; i<nWords; i++ ) + { + c += SYMCRYPT_MUL32x32TO64( SYMCRYPT_FDEF_INT_PUINT32( piSrc1 )[i], Src2 ); + SYMCRYPT_FDEF_INT_PUINT32( piDst )[i] = (UINT32) c; + c >>= 32; + } + + return (UINT32) c; +} + + +VOID +SYMCRYPT_CALL +SymCryptFdefIntMulSameSize( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + _Out_ PSYMCRYPT_INT piDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SymCryptFdefIntMulMixedSize( piSrc1, piSrc2, piDst, pbScratch, cbScratch ); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefIntSquare( + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_INT piDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 nS = piSrc->nDigits; + UINT32 nD = piDst->nDigits; + + SymCryptFdefClaimScratch( pbScratch, cbScratch, SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_MUL( piDst->nDigits ) ); + + SYMCRYPT_ASSERT( 2*nS <= nD ); + + SymCryptFdefRawSquare( SYMCRYPT_FDEF_INT_PUINT32( piSrc ), nS, SYMCRYPT_FDEF_INT_PUINT32( piDst ) ); + + if( 2*nS < nD ) + { + SymCryptWipe( &SYMCRYPT_FDEF_INT_PUINT32( piDst )[2 * nS * SYMCRYPT_FDEF_DIGIT_NUINT32], (nD - 2*nS) * SYMCRYPT_FDEF_DIGIT_SIZE ); + } +} + + +VOID +SYMCRYPT_CALL +SymCryptFdefRawMulC( + _In_reads_(nDigits1 * SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc1, + UINT32 nDigits1, + _In_reads_(nDigits2 * SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc2, + UINT32 nDigits2, + _Out_writes_((nDigits1+nDigits2)*SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pDst ) +{ + UINT32 nWords1 = nDigits1 * SYMCRYPT_FDEF_DIGIT_NUINT32; + UINT32 nWords2 = nDigits2 * SYMCRYPT_FDEF_DIGIT_NUINT32; + + // Set Dst to zero + SymCryptWipe( pDst, (nDigits1+nDigits2) * SYMCRYPT_FDEF_DIGIT_SIZE ); + + for( UINT32 i = 0; i < nWords1; i++ ) + { + UINT32 m = pSrc1[i]; + UINT64 c = 0; + for( UINT32 j = 0; j < nWords2; j++ ) + { + // Invariant: c < 2^32 + c += SYMCRYPT_MUL32x32TO64( pSrc2[j], m ); + c += pDst[i+j]; + // There is no overflow on C because the max value is + // (2^32 - 1) * (2^32 - 1) + 2^32 - 1 + 2^32 - 1 = 2^64 - 1. + pDst[i+j] = (UINT32) c; + c >>= 32; + } + pDst[i + nWords2] = (UINT32) c; + } +} + +VOID +SYMCRYPT_CALL +SymCryptFdefRawMul( + _In_reads_(nDigits1*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc1, + UINT32 nDigits1, + _In_reads_(nDigits2*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc2, + UINT32 nDigits2, + _Out_writes_((nDigits1+nDigits2)*SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pDst ) +{ +#if SYMCRYPT_CPU_AMD64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_MULX ) ) + { + SymCryptFdefRawMulMulx( pSrc1, nDigits1, pSrc2, nDigits2, pDst ); + } else { + SymCryptFdefRawMulAsm( pSrc1, nDigits1, pSrc2, nDigits2, pDst ); + } +#elif SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_ARM64 | SYMCRYPT_CPU_ARM + SymCryptFdefRawMulAsm( pSrc1, nDigits1, pSrc2, nDigits2, pDst ); +#else + SymCryptFdefRawMulC( pSrc1, nDigits1, pSrc2, nDigits2, pDst ); +#endif +} + +VOID +SYMCRYPT_CALL +SymCryptFdefRawSquareC( + _In_reads_(nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc, + UINT32 nDigits, + _Out_writes_(2*nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pDst ) +{ + UINT32 nWords = nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; + + UINT32 m = 0; + UINT64 c = 0; + + // Set Dst to zero + SymCryptWipe( pDst, (2*nDigits) * SYMCRYPT_FDEF_DIGIT_SIZE ); + + // First Pass - Addition of the cross products x_i*x_j with i!=j + for( UINT32 i = 0; i < nWords; i++ ) + { + m = pSrc[i]; + c = 0; + for( UINT32 j = i+1; j < nWords; j++ ) + { + // Invariant: c < 2^32 + c += SYMCRYPT_MUL32x32TO64( pSrc[j], m ); + c += pDst[i+j]; + // There is no overflow on C because the max value is + // (2^32 - 1) * (2^32 - 1) + 2^32 - 1 + 2^32 - 1 = 2^64 - 1. + pDst[i+j] = (UINT32) c; + c >>= 32; + } + pDst[i + nWords] = (UINT32) c; + } + + // Second Pass - Shifting all results 1 bit left + c = 0; + for( UINT32 i = 1; i < 2*nWords; i++ ) + { + c |= (((UINT64)pDst[i])<<1); + pDst[i] = (UINT32)c; + c >>= 32; + } + + // Third Pass - Adding the squares on the even columns and propagating the sum + c = 0; + for( UINT32 i = 0; i < nWords; i++ ) + { + // + // Even column + // + m = pSrc[i]; + c += SYMCRYPT_MUL32x32TO64( m, m ); + c += pDst[2*i]; + // There is no overflow on C because the max value is + // (2^32 - 1) * (2^32 - 1) + 2^32 - 1 + 2^32 - 1 = 2^64 - 1 + + pDst[2*i] = (UINT32) c; + c >>= 32; + + // + // Odd column + // + c += pDst[2*i+1]; + // There is no overflow on C because the max value is + // 2^32 - 1 + 2^32 - 1 = 2^33 - 2 + + pDst[2*i+1] = (UINT32) c; + c >>= 32; + } +} + +VOID +SYMCRYPT_CALL +SymCryptFdefRawSquare( + _In_reads_(nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc, + UINT32 nDigits, + _Out_writes_(2*nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pDst ) +{ +#if SYMCRYPT_CPU_AMD64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_MULX ) ) + { + SymCryptFdefRawSquareMulx( pSrc, nDigits, pDst ); + } else { + SymCryptFdefRawSquareAsm( pSrc, nDigits, pDst ); + } +#elif SYMCRYPT_CPU_ARM64 | SYMCRYPT_CPU_ARM + SymCryptFdefRawSquareAsm( pSrc, nDigits, pDst ); +#elif SYMCRYPT_CPU_X86 + SymCryptFdefRawMulAsm( pSrc, nDigits, pSrc, nDigits, pDst ); +#else + SymCryptFdefRawSquareC( pSrc, nDigits, pDst ); +#endif +} + +VOID +SYMCRYPT_CALL +SymCryptFdefIntMulMixedSize( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + _Out_ PSYMCRYPT_INT piDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 nS1 = piSrc1->nDigits; + UINT32 nS2 = piSrc2->nDigits; + UINT32 nD = piDst ->nDigits; + + SymCryptFdefClaimScratch( pbScratch, cbScratch, SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_MUL( piDst->nDigits ) ); + + SYMCRYPT_ASSERT( nS1 + nS2 <= nD ); + + SymCryptFdefRawMul( SYMCRYPT_FDEF_INT_PUINT32( piSrc1 ), nS1, SYMCRYPT_FDEF_INT_PUINT32( piSrc2 ), nS2, SYMCRYPT_FDEF_INT_PUINT32( piDst ) ); + + if( nS1 + nS2 < nD ) + { + SymCryptWipe( &SYMCRYPT_FDEF_INT_PUINT32( piDst )[(nS1 + nS2) * SYMCRYPT_FDEF_DIGIT_NUINT32], (nD - (nS1 + nS2)) * SYMCRYPT_FDEF_DIGIT_SIZE ); + } +} + + +PSYMCRYPT_INT +SYMCRYPT_CALL +SymCryptFdefIntFromDivisor( _In_ PSYMCRYPT_DIVISOR pdSrc ) +{ + return &pdSrc->Int; +} + +VOID +SYMCRYPT_CALL +SymCryptFdefIntToDivisor( + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_DIVISOR pdDst, + UINT32 totalOperations, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 W; + UINT32 nBits; + UINT32 nWords; + UINT32 bitToTest; + UINT64 P; + + UNREFERENCED_PARAMETER( totalOperations ); + UNREFERENCED_PARAMETER( flags ); + + SYMCRYPT_CHECK_MAGIC( piSrc ); + SYMCRYPT_CHECK_MAGIC( pdDst ); + + SYMCRYPT_ASSERT( piSrc->nDigits == pdDst->nDigits ); + + SymCryptFdefClaimScratch( pbScratch, cbScratch, SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_TO_DIVISOR( piSrc->nDigits ) ); + + // + // Copy the Int. + // + SymCryptFdefIntCopy( piSrc, &pdDst->Int ); + + // + // For an N-bit divisor M, and D-bit divisor digit size, + // the value W is defined as + // floor( (2^{N+D} - 1) / M } - 2^D + // which is the largest W such that (W * M + 2^D * M )< 2^{N+D} + // To compute W we use a binary search. + // This can be optimized, but this is the simplest side-channel safe solution. + // We can compute the upper bits of W * M + 2^D * M in a simple loop. + // + // For now we only compute a 32-bit W for a 32-bit digit divisor size. + // + + nBits = SymCryptIntBitsizeOfValue( &pdDst->Int ); + + SYMCRYPT_ASSERT( nBits != 0 ); + if( nBits == 0 ) + { + // Can't create a divisor from a Int whose value is 0 + + // We really should not have any callers which get here (it is a requirement that Src != 0) + // We assert in CHKed builds + // In release set the divisor to 1 instead + SymCryptIntSetValueUint32( 1, &pdDst->Int ); + } + + pdDst->nBits = nBits; + + nWords = (nBits + 31)/32; + bitToTest = (UINT32)1 << 31; + W = 0; + while( bitToTest > 0 ) + { + W |= bitToTest; + // Do the multiplication + P = 0; + for( UINT32 i=0; i<nWords; i++ ) + { + // Invariant: + // P <= 2^{2D} - 2 which ensures the mul-add doesn't generate an overflow + // P = floor( (W + 2^32)*M[0..i-1] / 2^{32*i} ) + P += SYMCRYPT_MUL32x32TO64( W, SYMCRYPT_FDEF_INT_PUINT32( &pdDst->Int )[i] ); + P >>= 32; + P += SYMCRYPT_FDEF_INT_PUINT32( &pdDst->Int )[i]; + } + // We are interested in bit N+D, and P[0] is bit nWords*D, this shift brings the relevant bit to position 0 + P >>= ((nBits+31) % 32) + 1; + // If the bit is 1, W*M is too large and we reset the corresponding bit in W. + W ^= bitToTest & (0 - ((UINT32)P & 1)); + bitToTest >>= 1; + } + pdDst->td.fdef.W = W; + + SYMCRYPT_SET_MAGIC( pdDst ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawMultSubUint32( + _Inout_updates_( nUint32 + 1 ) PUINT32 pAcc, + _In_reads_( nUint32 ) PCUINT32 pSrc1, + UINT32 Src2, + UINT32 nUint32 ) +{ + // + // pAcc -= pSrc1 * Src2 + // BEWARE: this is only used by the DivMod routine, and works in Words rather than Digits + // making optimizations hard. + // + + UINT32 i; + UINT64 tmul; + UINT64 tsub; + UINT32 c; + + tmul = 0; + c = 0; + for( i=0; i<nUint32; i++ ) + { + tmul += SYMCRYPT_MUL32x32TO64( pSrc1[i], Src2 ); + tsub = (UINT64)pAcc[i] - (UINT32) tmul - c; + pAcc[i] = (UINT32) tsub; + c = (tsub >> 32) & 1; + tmul >>= 32; + } + + // Writing the last word is strictly speaking not necessary, but a really good check that things are going right. + // We can remove the write, but still need the computation of c so it gains very little. + + tsub = (UINT64) pAcc[i] - (UINT32) tmul - c; + pAcc[i] = (UINT32) tsub; + c = (tsub >> 32) & 1; + + return c; +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawMaskedAddSubdigit( + _Inout_updates_( nUint32 ) PUINT32 pAcc, + _In_reads_( nUint32 ) PCUINT32 pSrc, + UINT32 mask, + UINT32 nUint32 ) +{ + UINT32 i; + UINT64 t; + + t = 0; + for( i=0; i<nUint32; i++ ) + { + t = t + pAcc[i] + (mask & pSrc[i]); + pAcc[i] = (UINT32) t; + t >>= 32; + } + + return (UINT32) t; +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawMaskedAdd( + _Inout_updates_( nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32 ) PUINT32 pAcc, + _In_reads_( nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32 ) PCUINT32 pSrc, + UINT32 mask, + UINT32 nDigits ) +{ + return SymCryptFdefRawMaskedAddSubdigit( pAcc, pSrc, mask, nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32 ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawMaskedSub( + _Inout_updates_( nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32 ) PUINT32 pAcc, + _In_reads_( nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32 ) PCUINT32 pSrc, + UINT32 mask, + UINT32 nDigits ) +{ + UINT32 i; + UINT64 t; + UINT32 c; + + c = 0; + for( i=0; i<nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; i++ ) + { + t = (UINT64) pAcc[i] - (mask & pSrc[i]) - c; + pAcc[i] = (UINT32) t; + c = (UINT32)(t >>= 32) & 1; + } + + return c; +} + + + +VOID +SYMCRYPT_CALL +SymCryptFdefRawDivMod( + _In_reads_(nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pNum, + UINT32 nDigits, + _In_ PCSYMCRYPT_DIVISOR pdDivisor, + _Out_writes_opt_(nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pQuotient, + _Out_writes_opt_(SYMCRYPT_OBJ_NUINT32(pdDivisor)) PUINT32 pRemainder, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 nWords = nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; + UINT32 activeDivWords = (pdDivisor->nBits + 8 * sizeof(UINT32) - 1) / (8 * sizeof( UINT32 ) ); + UINT32 remainderWords = SYMCRYPT_OBJ_NUINT32( pdDivisor ); + + UINT32 cbScratchNeeded = (nWords+4) * sizeof( UINT32 ); + PUINT32 pTmp = (PUINT32) pbScratch; + UINT32 Qest; + UINT32 Q; + UINT32 c; + UINT32 d; + UINT32 shift; + UINT32 X0, X1; + UINT32 W; + UINT64 T; + UINT32 nQ; + + SYMCRYPT_ASSERT( cbScratch >= cbScratchNeeded ); + SYMCRYPT_ASSERT_ASYM_ALIGNED( pbScratch ); + + if( nWords < activeDivWords ) + { + // + // input is smaller in size than the significant size of the divisor, no division to do. + // Note that both values in the if() statement are public, so this does not create a side channel. + // + + // Set quotient to zero, and the remainder to the input value + if( pQuotient != NULL ) + { + SymCryptWipe( pQuotient, nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ); + } + + if( pRemainder != NULL ) + { + SYMCRYPT_ASSERT( remainderWords >= nWords ); + memcpy( pRemainder, pNum, nWords * sizeof( UINT32 ) ); + SymCryptWipe( &pRemainder[nWords], (remainderWords - nWords) * sizeof( UINT32 ) ); // clear the rest of the remainder words + } + + SymCryptFdefClaimScratch( pbScratch, cbScratch, cbScratchNeeded ); + goto cleanup; + } + + // + // We have two zero words in front and two zero words behind the tmp value to allow unrestricted accesses. + // We keep the explicit offset of 2 rather than adjust the pTmp pointer to avoid negative indexes which appear + // to be buffer overflows, and cause trouble with unsigned computations of negative index values that overflow + // to 2^32 - 1 on a 64-bit CPU. + // + pTmp[0] = pTmp[1] = 0; + memcpy( &pTmp[2], pNum, nWords * sizeof( UINT32 ) ); + pTmp[nWords + 2] = pTmp[nWords + 3] = 0; + shift = (0 - pdDivisor->nBits) & 31; // # bits we have to shift top words to the left to align with the W value + + // We generate the quotient words one at a time, starting at the most significant position + // The top (divWords - 1) words are always zero + + if( pQuotient != NULL ) + { + SymCryptWipe( &pQuotient[nWords - activeDivWords + 1], (activeDivWords - 1) * sizeof( UINT32 ) ); + } + + nQ = nWords - activeDivWords + 1; + + // There is always at least one word of Q to be computed, so we can use a do-while loop which + // also avoids the UINT32 underflow. + do + { + nQ--; + X0 = ( ((UINT64) pTmp[nQ + activeDivWords + 2] << 32) + pTmp[nQ + activeDivWords + 1] ) >> (32 - shift); + X1 = ( ((UINT64) pTmp[nQ + activeDivWords + 1] << 32) + pTmp[nQ + activeDivWords + 0] ) >> (32 - shift); + + W = (UINT32) pdDivisor->td.fdef.W; + T = SYMCRYPT_MUL32x32TO64( W, X0 ) + (((UINT64)X0) << 32) + X1 + ((W>>1) & ((UINT32)0 - (X1 >> 31))); + Qest = (UINT32)(T >> 32); + // At this point the estimator is correct or one too small, add one but don't overflow + Qest += 1; + Qest += SYMCRYPT_MASK32_ZERO( Qest ); + + c = SymCryptFdefRawMultSubUint32( &pTmp[nQ+2], SYMCRYPT_FDEF_INT_PUINT32( &pdDivisor->Int ), Qest, activeDivWords ); + Q = Qest - c; + d = SymCryptFdefRawMaskedAddSubdigit( &pTmp[nQ+2], SYMCRYPT_FDEF_INT_PUINT32( &pdDivisor->Int ), (0-c), activeDivWords ); + SYMCRYPT_ASSERT( c == d ); + SYMCRYPT_ASSERT( pTmp[nQ + activeDivWords+2] == (0 - c) ); + + if( pQuotient != NULL ) + { + pQuotient[nQ] = Q; + } + } while( nQ > 0 ); + + if( pRemainder != NULL ) + { + memcpy( pRemainder, pTmp+2, activeDivWords * sizeof( UINT32 ) ); + SymCryptWipe( &pRemainder[activeDivWords], (remainderWords - activeDivWords) * sizeof( UINT32 ) ); + } + +cleanup: + return; // label needs a statement to follow it... +} + + +VOID +SYMCRYPT_CALL +SymCryptFdefIntDivMod( + _In_ PCSYMCRYPT_INT piSrc, + _In_ PCSYMCRYPT_DIVISOR pdDivisor, + _Out_opt_ PSYMCRYPT_INT piQuotient, + _Out_opt_ PSYMCRYPT_INT piRemainder, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 nDigits = SYMCRYPT_OBJ_NDIGITS( piSrc ); + + SYMCRYPT_ASSERT( piQuotient == NULL || piQuotient->nDigits >= piSrc->nDigits ); + SYMCRYPT_ASSERT( piRemainder == NULL || piRemainder->nDigits >= pdDivisor->nDigits ); + + SymCryptFdefRawDivMod( + SYMCRYPT_FDEF_INT_PUINT32( piSrc ), + nDigits, + pdDivisor, + piQuotient == NULL ? NULL : SYMCRYPT_FDEF_INT_PUINT32( piQuotient ), + piRemainder == NULL ? NULL : SYMCRYPT_FDEF_INT_PUINT32( piRemainder ), + pbScratch, + cbScratch + ); + + if ((piQuotient != NULL) && (piQuotient->nDigits > piSrc->nDigits)) + { + SymCryptWipe( &SYMCRYPT_FDEF_INT_PUINT32( piQuotient )[piSrc->nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32], (piQuotient->nDigits - piSrc->nDigits) * SYMCRYPT_FDEF_DIGIT_SIZE ); + } + + if ((piRemainder != NULL) && (piRemainder->nDigits > pdDivisor->nDigits)) + { + SymCryptWipe( &SYMCRYPT_FDEF_INT_PUINT32( piRemainder )[pdDivisor->nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32], (piRemainder->nDigits - pdDivisor->nDigits) * SYMCRYPT_FDEF_DIGIT_SIZE ); + } +} + +/* Wine hack: asm not supported yet */ + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawAddAsm( + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 Src1, + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 Src2, + _Out_writes_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PUINT32 Dst, + UINT32 nDigits ) +{ + return SymCryptFdefRawAddC( Src1, Src2, Dst, nDigits ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawSubAsm( + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc1, + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc2, + _Out_writes_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PUINT32 pDst, + UINT32 nDigits ) +{ + return SymCryptFdefRawSubC( pSrc1, pSrc2, pDst, nDigits ); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefRawMulAsm( + _In_reads_(nDigits1*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc1, + UINT32 nDigits1, + _In_reads_(nDigits2*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc2, + UINT32 nDigits2, + _Out_writes_((nDigits1+nDigits2)*SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pDst ) +{ + SymCryptFdefRawMulC( pSrc1, nDigits1, pSrc2, nDigits2, pDst ); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefRawSquareAsm( + _In_reads_(nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc, + UINT32 nDigits, + _Out_writes_(2*nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pDst ) +{ + SymCryptFdefRawSquareC( pSrc, nDigits, pDst ); +} + +/* Wine hack: asm not supported yet */ + +VOID +SYMCRYPT_CALL +SymCryptFdefRawSquareMulx( + _In_reads_(nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc, + UINT32 nDigits, + _Out_writes_(2*nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pDst ) +{ + SymCryptFdefRawSquareC( pSrc, nDigits, pDst ); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefRawMulMulx( + _In_reads_(nDigits1*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc1, + UINT32 nDigits1, + _In_reads_(nDigits2*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc2, + UINT32 nDigits2, + _Out_writes_((nDigits1+nDigits2)*SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pDst ) +{ + SymCryptFdefRawMulC( pSrc1, nDigits1, pSrc2, nDigits2, pDst ); +} diff --git a/libs/symcrypt/lib/fdef_mod.c b/libs/symcrypt/lib/fdef_mod.c new file mode 100644 index 00000000000..3ab4c2ba438 --- /dev/null +++ b/libs/symcrypt/lib/fdef_mod.c @@ -0,0 +1,1731 @@ +// +// fdef_int.c INT functions for default number format +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +PSYMCRYPT_MODULUS +SYMCRYPT_CALL +SymCryptFdefModulusAllocate( UINT32 nDigits ) +{ + PVOID p = NULL; + UINT32 cb; + PSYMCRYPT_MODULUS res = NULL; + + // + // The nDigits requirements are enforced by SymCryptFdefSizeofModulusFromDigits. Thus + // the result does not overflow and is upper bounded by 2^19. + // + cb = SymCryptFdefSizeofModulusFromDigits( nDigits ); + + if( cb != 0 ) + { + p = SymCryptCallbackAlloc( cb ); + } + + if( p == NULL ) + { + goto cleanup; + } + + res = SymCryptFdefModulusCreate( p, cb, nDigits ); + +cleanup: + return res; +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModulusFree( _Out_ PSYMCRYPT_MODULUS pmObj ) +{ + SymCryptModulusWipe( pmObj ); + SymCryptCallbackFree( pmObj ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefSizeofModulusFromDigits( UINT32 nDigits ) +{ + SYMCRYPT_ASSERT( nDigits != 0 ); + SYMCRYPT_ASSERT( nDigits <= SYMCRYPT_FDEF_UPB_DIGITS ); + + // Ensure we do not overflow the following calculation when provided with invalid inputs + if( nDigits == 0 || nDigits > SYMCRYPT_FDEF_UPB_DIGITS ) + { + return 0; + } + + // Room for the Modulus structure, the Divisor, the negated divisor, and the R^2 Montgomery factor + // + return SYMCRYPT_FIELD_OFFSET( SYMCRYPT_MODULUS, Divisor ) + SymCryptFdefSizeofDivisorFromDigits( nDigits ) + (2 * nDigits * SYMCRYPT_FDEF_DIGIT_SIZE); +} + +PSYMCRYPT_MODULUS +SYMCRYPT_CALL +SymCryptFdefModulusCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + UINT32 nDigits ) +{ + PSYMCRYPT_MODULUS pmMod = NULL; + UINT32 cb = SymCryptFdefSizeofModulusFromDigits( nDigits ); + + const UINT32 offset = SYMCRYPT_FIELD_OFFSET( SYMCRYPT_MODULUS, Divisor ); + + SYMCRYPT_ASSERT( cb >= sizeof(SYMCRYPT_MODULUS) ); + SYMCRYPT_ASSERT( cbBuffer >= cb ); + if( (cb == 0) || (cbBuffer < cb) ) + { + goto cleanup; // return NULL + } + + SYMCRYPT_ASSERT_ASYM_ALIGNED( pbBuffer ); + pmMod = (PSYMCRYPT_MODULUS) pbBuffer; + + pmMod->type = 'gM' << 16; + pmMod->nDigits = nDigits; + + // + // The nDigits requirements are enforced by SymCryptFdefSizeofModulusFromDigits. Thus + // the result does not overflow and is upper bounded by 2^19. + // + pmMod->cbSize = cb; + pmMod->flags = 0; + + // The following is bounded by 2^17 + pmMod->cbModElement = nDigits * SYMCRYPT_FDEF_DIGIT_SIZE; + + SymCryptFdefDivisorCreate( pbBuffer + offset, cbBuffer - offset, nDigits ); + + // We don't have a modulus value yet, so we don't create/initialize any implementation-specific things. + + SYMCRYPT_SET_MAGIC( pmMod ); + +cleanup: + return pmMod; +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModulusInitGeneric( + _Inout_ PSYMCRYPT_MODULUS pmMod, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UNREFERENCED_PARAMETER( pmMod ); + UNREFERENCED_PARAMETER( pbScratch ); + UNREFERENCED_PARAMETER( cbScratch ); +} + + +VOID +SymCryptFdefModulusCopy( + _In_ PCSYMCRYPT_MODULUS pmSrc, + _Out_ PSYMCRYPT_MODULUS pmDst ) +{ + SYMCRYPT_ASSERT( pmSrc->nDigits == pmDst->nDigits ); + + if( pmSrc != pmDst ) + { + memcpy( pmDst, pmSrc, pmDst->cbSize ); + + SymCryptFdefDivisorCopyFixup( &pmSrc->Divisor, &pmDst->Divisor ); + + // Copy the type-specific fields + SYMCRYPT_MOD_CALL( pmSrc ) modulusCopyFixup( pmSrc, pmDst ); + + SYMCRYPT_SET_MAGIC( pmDst ); + } +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModulusCopyFixupGeneric( + _In_ PCSYMCRYPT_MODULUS pmSrc, + _Out_ PSYMCRYPT_MODULUS pmDst ) +{ + // Only have to handle the type-specific fields, which we don't have any of. + UNREFERENCED_PARAMETER( pmSrc ); + UNREFERENCED_PARAMETER( pmDst ); +} + + +PSYMCRYPT_MODELEMENT +SYMCRYPT_CALL +SymCryptFdefModElementAllocate( _In_ PCSYMCRYPT_MODULUS pmMod ) +{ + PVOID p; + UINT32 cb; + PSYMCRYPT_MODELEMENT res = NULL; + + // + // The nDigits requirements are enforced by the modulus object. Thus + // the result does not overflow and is upper bounded by 2^17. + // + cb = SymCryptFdefSizeofModElementFromModulus( pmMod ); + + p = SymCryptCallbackAlloc( cb ); + + if( p == NULL ) + { + goto cleanup; + } + + res = SymCryptFdefModElementCreate( p, cb, pmMod ); + +cleanup: + return res; +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModElementFree( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peObj ) +{ + SymCryptFdefModElementWipe( pmMod, peObj ); + SymCryptCallbackFree( peObj ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefSizeofModElementFromModulus( PCSYMCRYPT_MODULUS pmMod ) +{ + // Upper bounded by 2^17 since the modulus is up to SYMCRYPT_INT_MAXBITS = 2^20 bits. + return pmMod->cbModElement; +} + +PSYMCRYPT_MODELEMENT +SYMCRYPT_CALL +SymCryptFdefModElementCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + PCSYMCRYPT_MODULUS pmMod ) +{ + PSYMCRYPT_MODELEMENT pDst = (PSYMCRYPT_MODELEMENT) pbBuffer; + + UNREFERENCED_PARAMETER( pmMod ); + UNREFERENCED_PARAMETER( cbBuffer ); + + SYMCRYPT_ASSERT_ASYM_ALIGNED( pbBuffer ); + SYMCRYPT_ASSERT( cbBuffer >= SymCryptFdefSizeofModElementFromModulus( pmMod ) ); + SYMCRYPT_ASSERT( cbBuffer >= pmMod->nDigits*SYMCRYPT_FDEF_DIGIT_SIZE ); + + // + // We have various optimizations where we use only part of the last digit + // Simple and fast solution: always wipe the last digit + // +#if (SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM64) + UINT32 nDigits = pmMod->nDigits; + + SymCryptWipeKnownSize( pbBuffer + (nDigits-1) * SYMCRYPT_FDEF_DIGIT_SIZE, SYMCRYPT_FDEF_DIGIT_SIZE ); +#endif + + // There is nothing to initialize... + + return pDst; +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModElementWipe( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peDst ) +{ + SymCryptWipe( peDst, pmMod->cbModElement ); +} + +VOID +SymCryptFdefModElementCopy( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst ) +{ + if( peSrc != peDst ) + { + memcpy( peDst, peSrc, pmMod->cbModElement ); + } +} + +VOID +SymCryptFdefModElementMaskedCopy( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst, + UINT32 mask ) +{ + SymCryptFdefMaskedCopy( (PCBYTE) peSrc, (PBYTE) peDst, pmMod->nDigits, mask ); +} + + +PSYMCRYPT_DIVISOR +SYMCRYPT_CALL +SymCryptFdefDivisorFromModulus( _In_ PSYMCRYPT_MODULUS pmSrc ) +{ + return &pmSrc->Divisor; +} + +VOID +SymCryptFdefModElementConditionalSwap( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_ PSYMCRYPT_MODELEMENT peData1, + _Inout_ PSYMCRYPT_MODELEMENT peData2, + _In_ UINT32 cond ) +{ + SymCryptFdefConditionalSwap( (PBYTE) &peData1->d.uint32[0], (PBYTE) &peData2->d.uint32[0], pmMod->nDigits, cond ); +} + +PSYMCRYPT_INT +SYMCRYPT_CALL +SymCryptFdefIntFromModulus( _In_ PSYMCRYPT_MODULUS pmSrc ) +{ + + return SymCryptFdefIntFromDivisor( &pmSrc->Divisor ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefDecideModulusType( PCSYMCRYPT_INT piSrc, UINT32 nDigits, UINT32 averageOperations, UINT32 flags ) +{ + UINT32 res = 0; + BOOLEAN disableMontgomery = 0; + BYTE tempBuf[64]; + PCSYMCRYPT_MODULUS_TYPE_SELECTION_ENTRY pEntry; + + UINT32 nBitsizeOfValue = SymCryptIntBitsizeOfValue( piSrc ); + UINT32 modulusFeatures = 0; + + if( !disableMontgomery && + ( flags & (SYMCRYPT_FLAG_DATA_PUBLIC | SYMCRYPT_FLAG_MODULUS_PARITY_PUBLIC)) != 0 && + (SymCryptIntGetValueLsbits32( piSrc ) & 1) == 1 && + averageOperations >= 10 ) + { + modulusFeatures |= SYMCRYPT_MODULUS_FEATURE_MONTGOMERY; + + // Specific modulus value detection + if( (flags & SYMCRYPT_FLAG_DATA_PUBLIC) != 0 ) + { + // Detect if modulus value is the P384 field modulus (convert piSrc to big endian and do comparison with known value of P384 modulus) + if( nBitsizeOfValue == 384 && + SymCryptFdefRawGetValue(SYMCRYPT_FDEF_INT_PUINT32(piSrc), SYMCRYPT_FDEF_DIGITS_FROM_BITS(384), tempBuf, 64, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST) == SYMCRYPT_NO_ERROR ) + { + // First 16 bytes are guaranteed to be zero because nBitsizeOfValue is 384 + if( memcmp(tempBuf+16, ((PBYTE)SymCryptEcurveParamsNistP384) + sizeof(SYMCRYPT_ECURVE_PARAMS), 48) == 0 ) + { + modulusFeatures |= SYMCRYPT_MODULUS_FEATURE_NISTP384; + } + } + + // Detect if modulus value is the P256 field modulus (not currently used) + // if( nBitsizeOfValue == 256 && + // SymCryptFdefRawGetValue(SYMCRYPT_FDEF_INT_PUINT32(piSrc), SYMCRYPT_FDEF_DIGITS_FROM_BITS(256), tempBuf, 64, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST) == SYMCRYPT_NO_ERROR ) + // { + // // First 32 bytes are guaranteed to be zero because nBitsizeOfValue is 256 + // if( memcmp(tempBuf+32, ((PBYTE)SymCryptEcurveParamsNistP256) + sizeof(SYMCRYPT_ECURVE_PARAMS), 32) == 0 ) + // { + // modulusFeatures |= SYMCRYPT_MODULUS_FEATURE_NISTP256; + // } + // } + } + } + + pEntry = SymCryptModulusTypeSelections; + + for(;;) + { + if( SYMCRYPT_CPU_FEATURES_PRESENT( pEntry->cpuFeatures ) && + (pEntry->maxBits == 0 || (nDigits <= SymCryptDigitsFromBits( pEntry->maxBits ) && nBitsizeOfValue <= pEntry->maxBits )) && + (pEntry->modulusFeatures & ~modulusFeatures) == 0 + ) + { + res = pEntry->type; + break; + } + pEntry++; + } + + return res; +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModSetPostGeneric( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_ PSYMCRYPT_MODELEMENT peObj, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UNREFERENCED_PARAMETER( pmMod ); + UNREFERENCED_PARAMETER( peObj ); + UNREFERENCED_PARAMETER( pbScratch ); + UNREFERENCED_PARAMETER( cbScratch ); +} + +PCUINT32 +SYMCRYPT_CALL +SymCryptFdefModPreGetGeneric( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peObj, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UNREFERENCED_PARAMETER( pmMod ); + UNREFERENCED_PARAMETER( pbScratch ); + UNREFERENCED_PARAMETER( cbScratch ); + + return &peObj->d.uint32[0]; +} + + + +VOID +SYMCRYPT_CALL +SymCryptFdefIntToModulus( + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_MODULUS pmDst, + UINT32 averageOperations, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + pmDst->flags = flags; + SymCryptIntToDivisor( piSrc, &pmDst->Divisor, averageOperations, flags & SYMCRYPT_FLAG_DATA_PUBLIC, pbScratch, cbScratch ); + + pmDst->type = SymCryptFdefDecideModulusType( piSrc, pmDst->nDigits, averageOperations, flags ); + + // Set inv64 - note the value is only valid if the modulus is odd, but the computation + // is constant time regardless of the parity, so we can safely compute it in all cases + pmDst->inv64 = 0 - SymCryptInverseMod2e64( SymCryptIntGetValueLsbits64(piSrc) ); + + SYMCRYPT_MOD_CALL( pmDst ) modulusInit( pmDst, pbScratch, cbScratch ); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefIntToModElement( + _In_ PCSYMCRYPT_INT piSrc, + _In_ PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SymCryptFdefRawDivMod( + SYMCRYPT_FDEF_INT_PUINT32( piSrc ), + piSrc->nDigits, + &pmMod->Divisor, + NULL, // throw away the quotient + &peDst->d.uint32[0], + pbScratch, + cbScratch ); + + SYMCRYPT_MOD_CALL( pmMod ) modSetPost( pmMod, peDst, pbScratch, cbScratch ); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModElementToIntGeneric( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_reads_bytes_( pmMod->nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) + PCUINT32 pSrc, + _Out_ PSYMCRYPT_INT piDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + memcpy( SYMCRYPT_FDEF_INT_PUINT32( piDst ), pSrc, pmMod->nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ); + + SymCryptWipe( &SYMCRYPT_FDEF_INT_PUINT32( piDst )[pmMod->nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32], (piDst->nDigits - pmMod->nDigits) * SYMCRYPT_FDEF_DIGIT_SIZE ); + + SymCryptFdefClaimScratch( pbScratch, cbScratch, SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pmMod->nDigits ) ); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptFdefModElementSetValueGeneric( + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_NUMBER_FORMAT format, + _In_ PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError; + UINT32 nDigits = pmMod->nDigits; + + SymCryptFdefClaimScratch( pbScratch, cbScratch, SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigits ) ); + + SYMCRYPT_ASSERT( cbSrc <= nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ); + + scError = SymCryptFdefRawSetValue( pbSrc, cbSrc, format, &peDst->d.uint32[0], nDigits ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + SymCryptFdefRawDivMod( + &peDst->d.uint32[0], + nDigits, + &pmMod->Divisor, + NULL, + &peDst->d.uint32[0], + pbScratch, + cbScratch ); + + scError = SYMCRYPT_NO_ERROR; + +cleanup: + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptFdefModElementGetValue( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_writes_bytes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst, + SYMCRYPT_NUMBER_FORMAT format, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError; + PCUINT32 pUint32; + UINT32 nDigits = pmMod->nDigits; + + + SymCryptFdefClaimScratch( pbScratch, cbScratch, SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigits ) ); + + SYMCRYPT_ASSERT( cbDst <= nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ); + + pUint32 = SYMCRYPT_MOD_CALL( pmMod ) modPreGet( pmMod, peSrc, pbScratch, cbScratch ); + + scError = SymCryptFdefRawGetValue( pUint32, nDigits, pbDst, cbDst, format ); + + return scError; +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefModElementIsEqual( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2 ) +{ + UINT32 d; + UINT32 i; + + d = 0; + for( i=0; i < pmMod->nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32 ; i++ ) + { + d |= peSrc1->d.uint32[i] ^ peSrc2->d.uint32[i]; + } + + return SYMCRYPT_MASK32_ZERO( d ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptFdefModElementIsZero( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc ) +{ + UINT32 d; + UINT32 i; + + d = 0; + for( i=0; i < pmMod->nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32 ; i++ ) + { + d |= peSrc->d.uint32[i]; // Check that all bits are zero + } + + return SYMCRYPT_MASK32_ZERO( d ); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModAddGeneric( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 c; + UINT32 d; + UINT32 nDigits = pmMod->nDigits; + + SymCryptFdefClaimScratch( pbScratch, cbScratch, SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigits ) ); + SYMCRYPT_ASSERT( cbScratch >= nDigits*SYMCRYPT_FDEF_DIGIT_SIZE ); + + // + // Doing add/cmp/sub might be faster or not. + // Masked add is hard because the mask operations destroy the carry flag. + // + + // dcl - cleanup? + +// c = SymCryptFdefRawAdd( &pSrc1->uint32[0], &pSrc2->uint32[0], &pDst->uint32[0], nDigits); +// d = SymCryptFdefRawSub( &pDst->uint32[0], &pMod->Divisor.Int.uint32[0], &pDst->uint32[0], nDigits ); +// e = SymCryptFdefRawMaskedAdd( &pDst->uint32[0], &pMod->Divisor.Int.uint32[0], 0 - (c^d), nDigits ); + + c = SymCryptFdefRawAdd( &peSrc1->d.uint32[0], &peSrc2->d.uint32[0], &peDst->d.uint32[0], nDigits ); + d = SymCryptFdefRawSub( &peDst->d.uint32[0], SYMCRYPT_FDEF_INT_PUINT32( &pmMod->Divisor.Int ), (PUINT32) pbScratch, nDigits ); + SymCryptFdefMaskedCopy( pbScratch, (PBYTE) &peDst->d.uint32[0], nDigits, (c^d) - 1 ); + + // We can't have a carry in the first addition, and no carry in the subtraction. + SYMCRYPT_ASSERT( !( c == 1 && d == 0 ) ); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModSubGeneric( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 c; + UINT32 d; + UINT32 nDigits = pmMod->nDigits; + + SymCryptFdefClaimScratch( pbScratch, cbScratch, SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigits ) ); + SYMCRYPT_ASSERT( cbScratch >= nDigits*SYMCRYPT_FDEF_DIGIT_SIZE ); + + c = SymCryptFdefRawSub( &peSrc1->d.uint32[0], &peSrc2->d.uint32[0], &peDst->d.uint32[0], nDigits ); + d = SymCryptFdefRawAdd( &peDst->d.uint32[0], SYMCRYPT_FDEF_INT_PUINT32( &pmMod->Divisor.Int ), (PUINT32) pbScratch, nDigits ); + SymCryptFdefMaskedCopy( pbScratch, (PBYTE) &peDst->d.uint32[0], nDigits, 0 - c ); + + SYMCRYPT_ASSERT( !(c == 1 && d == 0) ); +} + + +VOID +SYMCRYPT_CALL +SymCryptFdefModNegGeneric( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 nDigits = pmMod->nDigits; + UINT32 isZero; + UINT32 i; + + SymCryptFdefClaimScratch( pbScratch, cbScratch, SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigits ) ); + + // + // We have to be careful to handle the value 0 properly as it does NOT map to Modulus - Value. + // + isZero = SymCryptFdefRawIsEqualUint32( &peSrc->d.uint32[0], nDigits , 0 ); + SymCryptFdefRawSub( SYMCRYPT_FDEF_INT_PUINT32( &pmMod->Divisor.Int ), &peSrc->d.uint32[0], &peDst->d.uint32[0], nDigits ); + + // Now we set the result to zero if the input was zero + for( i=0; i< nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; i++ ) + { + peDst->d.uint32[i] &= ~isZero; + } +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModElementSetValueUint32Generic( + UINT32 value, + _In_ PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 nDigits = pmMod->nDigits; + + SymCryptFdefClaimScratch( pbScratch, cbScratch, SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigits ) ); + + if( pmMod->Divisor.nBits <= 32 && value >= SYMCRYPT_FDEF_INT_PUINT32( &pmMod->Divisor.Int )[0] ) + { + // The value is >= the modulus; this is not supported + + // For now do a possibly non-sidechannel safe, but mathematically correct modulo operation + value %= SYMCRYPT_FDEF_INT_PUINT32( &pmMod->Divisor.Int )[0]; + } + + peDst->d.uint32[0] = value; + + SymCryptWipe( &peDst->d.uint32[1], nDigits * SYMCRYPT_FDEF_DIGIT_SIZE - sizeof( UINT32 ) ); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModElementSetValueNegUint32( + UINT32 value, + _In_ PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 nDigits = pmMod->nDigits; + + SymCryptFdefClaimScratch( pbScratch, cbScratch, SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigits ) ); + + if( pmMod->Divisor.nBits <= 32 && value >= SYMCRYPT_FDEF_INT_PUINT32( &pmMod->Divisor.Int )[0] ) + { + // The value is >= the modulus; this is not supported. + + // For now do a possibly non-sidechannel safe, but mathematically correct modulo operation + value %= SYMCRYPT_FDEF_INT_PUINT32( &pmMod->Divisor.Int )[0]; + } + + if( value == 0 ) + { + SymCryptWipe( &peDst->d.uint32[0], nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ); + } else { + SymCryptFdefRawSubUint32( SYMCRYPT_FDEF_INT_PUINT32( &pmMod->Divisor.Int ), value, &peDst->d.uint32[0], nDigits ); + } + + // + // Possible future optimization: we can optimize the value==0 and value==1 cases on a per-type basis + // + SYMCRYPT_MOD_CALL( pmMod ) modSetPost( pmMod, peDst, pbScratch, cbScratch ); +} + +// In the worst case there is a 1 in 8 chance of successfully generating a value +// This is when the modulus is 4 (nBits of modulus is 3), and 0, 1, and -1 are disallowed. +// In this case, having 1000 retries, there is a ~ 2^-193 chance of failure unless SymCryptCallbackRandom +// is completely broken. This passes the bar of being reasonable to Fatal. +#define FDEF_MOD_SET_RANDOM_GENERIC_LIMIT (1000) + +VOID +SYMCRYPT_CALL +SymCryptFdefModSetRandomGeneric( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 offset; + UINT32 ulimit; + UINT32 nDigits = pmMod->nDigits; + PUINT32 pTmp = (PUINT32) pbScratch; + UINT32 nUsedBytes; + UINT32 mask; + UINT32 c; + UINT32 cntr; + PUINT32 pDst = &peDst->d.uint32[0]; + PCUINT32 pMod = SYMCRYPT_FDEF_INT_PUINT32( &pmMod->Divisor.Int ); + + SymCryptFdefClaimScratch( pbScratch, cbScratch, SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigits ) ); + + if( (flags & SYMCRYPT_FLAG_MODRANDOM_ALLOW_ZERO) != 0 ) + { + // SYMCRYPT_FLAG_MODRANDOM_ALLOW_ZERO => SYMCRYPT_FLAG_MODRANDOM_ALLOW_ONE + offset = 0; + } else if( (flags & SYMCRYPT_FLAG_MODRANDOM_ALLOW_ONE) != 0 ) + { + offset = 1; + } else + { + offset = 2; + } + + if( (flags & SYMCRYPT_FLAG_MODRANDOM_ALLOW_MINUSONE ) ) + { + ulimit = 0; + } else { + ulimit = 1; + } + + // + // Special case for small divisors: + // When the divisor is 1, 2, or 3 we always allow returning -1 + // We may also allow returning 1 or 0 depending on the flags specified + if ( pmMod->Divisor.nBits < 3 ) + { + // At a minimum, allow -1 + offset = SYMCRYPT_MIN(offset, pMod[0] - 1); + ulimit = 0; + } + + // Set pTmp to pMod-(offset+ulimit) + SYMCRYPT_ASSERT( nDigits * SYMCRYPT_FDEF_DIGIT_SIZE <= cbScratch ); + c = SymCryptFdefRawSubUint32( pMod, offset + ulimit, pTmp, nDigits ); + SYMCRYPT_ASSERT( c == 0 ); + + nUsedBytes = (pmMod->Divisor.nBits + 7)/8; + mask = 0x100 >> ( (8-pmMod->Divisor.nBits) & 7); + mask -= 1; + + // Wipe any bytes we won't fill with random + SymCryptWipe( (PBYTE)pDst + nUsedBytes, (nDigits * SYMCRYPT_FDEF_DIGIT_SIZE) - nUsedBytes ); + + for(cntr=0; cntr<FDEF_MOD_SET_RANDOM_GENERIC_LIMIT; cntr++) + { + // Try random values until we get one we like + SymCryptCallbackRandom( (PBYTE)pDst, nUsedBytes ); + ((PBYTE)pDst)[nUsedBytes-1] &= (BYTE) mask; + + // Compare value to pMod-(offset+ulimit) + if( SymCryptFdefRawIsLessThan( pDst, pTmp, nDigits ) ) + { + // The value is within required range [0, Divisor-offset-ulimit) + break; + } + } + + // Wipe all the digits in pTmp + SymCryptWipe( pTmp, nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ); + + if (cntr >= FDEF_MOD_SET_RANDOM_GENERIC_LIMIT) + { + SymCryptFatal( 'rndc'); + } + + // Add the offset which allows us to avoid 0 and/or 1 if required. + // Now result is in range [offset, Divisor-ulimit) + c = SymCryptFdefRawAddUint32( pDst, offset, pDst, nDigits ); + SYMCRYPT_ASSERT( c == 0 ); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModDivSmallPow2Generic( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _In_range_(1, NATIVE_BITS) UINT32 exp, + _Out_ PSYMCRYPT_MODELEMENT peDst) +{ + UINT32 nDigits = pmMod->nDigits; + UINT32 mask; + UINT64 t; + UINT64 u; + UINT32 i; + PCUINT32 pMod = SYMCRYPT_FDEF_INT_PUINT32( &pmMod->Divisor.Int ); + + // mod must be odd + SYMCRYPT_ASSERT( (pMod[0] & 1) != 0 ); + SYMCRYPT_ASSERT( (exp >= 1) && (exp <= NATIVE_BITS) ); + + do + { + mask = (UINT32)0 - (peSrc->d.uint32[0] & 1); + + t = (UINT64) peSrc->d.uint32[0] + (pMod[0] & mask); + u = (UINT32) t; + t >>= 32; + + for( i = 1; i < nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; i++ ) + { + t += pMod[i] & mask; + t += peSrc->d.uint32[i]; + + u |= t << 32; + + peDst->d.uint32[i-1] = (UINT32)(u >> 1); + t >>= 32; + u >>= 32; + } + u |= t << 32; + peDst->d.uint32[i-1] = (UINT32)( u >> 1 ); + + exp -= 1; + + // First iteration reads from peSrc and writes to peDst + // subsequent iterations must read from and write to peDst + peSrc = peDst; + } while (exp > 0); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModDivSmallPow2( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _In_range_(1, NATIVE_BITS) UINT32 exp, + _Out_ PSYMCRYPT_MODELEMENT peDst ) +{ + +#if SYMCRYPT_CPU_AMD64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_MULX ) ) + { + SymCryptFdefModDivSmallPow2Mulx( pmMod, peSrc, exp, peDst ); + } + else + { + // Currently SymCryptAsm does not support AMD64 functions with shl/shr/shrd + // by a variable count, as this needs special handling of the rcx (cl) register + // For now we just fallback to the generic implementation on machines without MULX + SymCryptFdefModDivSmallPow2Generic( pmMod, peSrc, exp, peDst ); + } +#elif SYMCRYPT_CPU_ARM64 + SymCryptFdefModDivSmallPow2Asm( pmMod, peSrc, exp, peDst ); +#else + SymCryptFdefModDivSmallPow2Generic( pmMod, peSrc, exp, peDst ); +#endif +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModDivPow2( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + UINT32 exp, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 shiftAmount; + + UNREFERENCED_PARAMETER(pbScratch); + UNREFERENCED_PARAMETER(cbScratch); + + // mod must be odd + SYMCRYPT_ASSERT( (SYMCRYPT_FDEF_INT_PUINT32(&pmMod->Divisor.Int)[0] & 1) != 0 ); + + if( exp == 0 ) + { + // If exp is 0 we just need to copy peSrc to peDst + SymCryptFdefModElementCopy( pmMod, peSrc, peDst ); + return; + } + + do + { + shiftAmount = SYMCRYPT_MIN(NATIVE_BITS, exp); + SymCryptFdefModDivSmallPow2( pmMod, peSrc, shiftAmount, peDst ); + exp -= shiftAmount; + + // First iteration reads from peSrc and writes to peDst + // subsequent iterations must read from and write to peDst + peSrc = peDst; + } while( exp > 0 ); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModMulGeneric( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 nDigits = pmMod->nDigits; + PUINT32 pTmp = (PUINT32) pbScratch; + UINT32 scratchOffset = 2 * nDigits * SYMCRYPT_FDEF_DIGIT_SIZE; + + SymCryptFdefClaimScratch( pbScratch, cbScratch, SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigits ) ); + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigits ) ); + SYMCRYPT_ASSERT( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigits ) >= scratchOffset + SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_DIVMOD( 2 * nDigits, nDigits ) ); + SYMCRYPT_ASSERT_ASYM_ALIGNED( pbScratch ); + + // Tmp space is enough for the product plus the DivMod scratch + + SymCryptFdefRawMul( &peSrc1->d.uint32[0], nDigits, &peSrc2->d.uint32[0], nDigits, pTmp ); + + SymCryptFdefRawDivMod( pTmp, 2*nDigits, &pmMod->Divisor, NULL, &peDst->d.uint32[0], pbScratch + scratchOffset, cbScratch - scratchOffset ); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModSquareGeneric( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 nDigits = pmMod->nDigits; + PUINT32 pTmp = (PUINT32) pbScratch; + UINT32 scratchOffset = 2 * nDigits * SYMCRYPT_FDEF_DIGIT_SIZE; + + SymCryptFdefClaimScratch( pbScratch, cbScratch, SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigits ) ); + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigits ) ); + SYMCRYPT_ASSERT( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigits ) >= scratchOffset + SYMCRYPT_FDEF_SCRATCH_BYTES_FOR_INT_DIVMOD( 2 * nDigits, nDigits ) ); + SYMCRYPT_ASSERT_ASYM_ALIGNED( pbScratch ); + + // Tmp space is enough for the product plus the DivMod scratch + + SymCryptFdefRawSquare( &peSrc->d.uint32[0], nDigits, pTmp ); + + SymCryptFdefRawDivMod( pTmp, 2*nDigits, &pmMod->Divisor, NULL, &peDst->d.uint32[0], pbScratch + scratchOffset, cbScratch - scratchOffset ); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptFdefModInvGeneric( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + UINT32 nDigits = pmMod->nDigits; + UINT32 nBytes; + UINT32 c; + UINT32 leastSignificantUint32; + UINT32 trailingZeros; + + // + // This function is called on Montgomery moduli so we can't directly call specifically optimized modular operations from here. + // + // For now we use dispatch functions with pmMod to perform potentially optimized modular operations. + // This approach makes sense when on average the cost of dispatch is less than the benefit using an optimized operation. + // The alternative is to make specialized ModInv routines for different types of moduli, but we do not yet do this to + // reduce code duplication / code size. + // + + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_MODINV( nDigits ) ); + + if( (pmMod->flags & (SYMCRYPT_FLAG_DATA_PUBLIC | SYMCRYPT_FLAG_MODULUS_PRIME )) != (SYMCRYPT_FLAG_DATA_PUBLIC | SYMCRYPT_FLAG_MODULUS_PRIME ) ) + { + // Inversion over non-public or non-prime moduli currently not supported. + // Our blinding below only works for prime moduli. + // As the modulus cannot be blinded, it requires a fully side-channel safe algorithm which is much more complicated and + // slower. + // When this is necessary, we will add a second ModInv implementation for those cases. + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // + // Algorithm: + // R = random nonzero value mod Mod + // X := Src * R (mod Mod) + // A = X + // B = Mod + // Va = 1 + // Vb = 0 + // invariant: A = Va*X (mod Mod), B = Vb*X (mod Mod), + // + // if( A == 0 ): error + // + // verify (A | B) is odd + // if B even: swap (A,B), swap( Va, Vb) + // + // repeat: + // while( A even ): + // A /= 2; Va /= 2 (mod Mod) + // if( A == 1 ): break1 + // (A, Va, B, Vb) = (B-A, Vb - Va, A, Va) + // if( A == 0 ): error (not co-prime) + + nBytes = SymCryptSizeofModElementFromModulus( pmMod ); + + SYMCRYPT_ASSERT( cbScratch >= 4*nBytes ); + PSYMCRYPT_MODELEMENT peR = SymCryptModElementCreate( pbScratch, nBytes, pmMod ); + pbScratch += nBytes; + PSYMCRYPT_MODELEMENT peX = SymCryptModElementCreate( pbScratch, nBytes, pmMod ); + pbScratch += nBytes; + PSYMCRYPT_MODELEMENT peVa = SymCryptModElementCreate( pbScratch, nBytes, pmMod ); + pbScratch += nBytes; + PSYMCRYPT_MODELEMENT peVb = SymCryptModElementCreate( pbScratch, nBytes, pmMod ); + pbScratch += nBytes; + cbScratch -= 4*nBytes; + + PSYMCRYPT_MODELEMENT peVtmpPtr; + + nBytes = SymCryptSizeofIntFromDigits( nDigits ); + SYMCRYPT_ASSERT( cbScratch >= 3 * nBytes ); + PSYMCRYPT_INT piA = SymCryptIntCreate( pbScratch, nBytes, nDigits ); + pbScratch += nBytes; + PSYMCRYPT_INT piB = SymCryptIntCreate( pbScratch, nBytes, nDigits ); + pbScratch += nBytes; + PSYMCRYPT_INT piT = SymCryptIntCreate( pbScratch, nBytes, nDigits ); + pbScratch += nBytes; + cbScratch -= 3*nBytes; + + PSYMCRYPT_INT piTmpPtr; + + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigits ) ); + + // If the data is not public, multiply by a random blinding factor; otherwise copy the value + if( (flags & SYMCRYPT_FLAG_DATA_PUBLIC) == 0 ) + { + SymCryptModSetRandom( pmMod, peR, SYMCRYPT_FLAG_MODRANDOM_ALLOW_ONE | SYMCRYPT_FLAG_MODRANDOM_ALLOW_MINUSONE, pbScratch, cbScratch ); //R = random + SymCryptModMul( pmMod, peR, peSrc, peX, pbScratch, cbScratch ); // X = R * Src + } else + { + SymCryptModElementCopy( pmMod, peSrc, peX ); + } + + // Set up piA and piB + SymCryptFdefModElementToIntGeneric( pmMod, &peX->d.uint32[0], piA, pbScratch, cbScratch ); // A = X + SymCryptIntCopy( SymCryptIntFromModulus( (PSYMCRYPT_MODULUS) pmMod ), piB ); // B = Mod + + // Reject if A = 0, B = 0, or A and B both even + if( SymCryptIntIsEqualUint32( piA, 0 ) | + SymCryptIntIsEqualUint32( piB, 0 ) | + (((SymCryptIntGetValueLsbits32( piA ) | SymCryptIntGetValueLsbits32( piB )) & 1) ^ 1) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if( SymCryptIntIsEqualUint32( piB, 2 ) ) + { + // Mod = 2 is a valid input. Luckily, modular inversion is easy. + // The rest of the code assumes that Mod is odd. Other even values are not prime. + SymCryptModElementCopy( pmMod, peSrc, peDst); + goto cleanup; + } + + SymCryptFdefModElementSetValueUint32Generic( 1, pmMod, peVa, pbScratch, cbScratch ); // Va = 1 + SymCryptFdefModElementSetValueUint32Generic( 0, pmMod, peVb, pbScratch, cbScratch ); // Vb = 0 + + for(;;) + { + // invariant: A = Va*X (mod Mod), B = Vb*X (mod Mod), A != 0, B > 1. + // Remove factors of 2 from A. This loop terminates because A != 0 + leastSignificantUint32 = SymCryptIntGetValueLsbits32(piA); + while( (leastSignificantUint32 & 1) == 0 ) + { + trailingZeros = SymCryptCountTrailingZeros32( leastSignificantUint32 ); + SymCryptIntDivPow2( piA, trailingZeros, piA ); + SymCryptFdefModDivSmallPow2( pmMod, peVa, trailingZeros, peVa ); + leastSignificantUint32 = SymCryptIntGetValueLsbits32(piA); + } + + if( SymCryptIntIsEqualUint32( piA, 1 ) ) + { + // A = 1 = Va * X (mod Mod), so Va is the inverse of X + break; + } + + c = SymCryptIntSubSameSize( piB, piA, piT ); + + // If A != 1 and A=B, then A is the GCD of the original inputs, and there is no inverse + if( SymCryptIntIsEqualUint32( piT, 0 ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if( c == 0 ) + { + // B > A, we set B to B-A and swap (B,A) + // that way we continue our halving on B-A + + SymCryptIntCopy( piT, piB ); + SymCryptModSub( pmMod, peVb, peVa, peVb, pbScratch, cbScratch ); + + piTmpPtr = piB; piB = piA; piA = piTmpPtr; + peVtmpPtr = peVb; peVb = peVa; peVa = peVtmpPtr; + } else { + // B < A, Set A to A-B and continue halving A + SymCryptIntNeg( piT, piA ); + SymCryptModSub( pmMod, peVa, peVb, peVa, pbScratch, cbScratch ); + } + } + + // 1 = A = Va * X (mod Mod), so Va is the inverse of X + // Check computation that we can test in the debugger + SymCryptModMul( pmMod, peVa, peX, peVb, pbScratch, cbScratch ); + + // Actual answer + + // If the data is not public, multiply by the random blinding factor; otherwise copy the value + if( (flags & SYMCRYPT_FLAG_DATA_PUBLIC) == 0 ) + { + SymCryptModMul( pmMod, peVa, peR, peDst, pbScratch, cbScratch ); + } else + { + SymCryptModElementCopy( pmMod, peVa, peDst ); + } + +cleanup: + return scError; +} + + +//============================= +// Montgomery representation + +VOID +SYMCRYPT_CALL +SymCryptFdefModulusInitMontgomeryInternal( + _Inout_ PSYMCRYPT_MODULUS pmMod, + UINT32 nUint32Used, // R = 2^{32 * this parameter} + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + // Scratch space is big enough for an nDigit+1 byte value + sufficient divmod scratch + PUINT32 pR2; + UINT32 cbR2; + UINT32 nDigits; + + PUINT32 modR2; + PUINT32 negDivisor; + + nDigits = pmMod->nDigits; + modR2 = (PUINT32)((PBYTE)&pmMod->Divisor + SymCryptFdefSizeofDivisorFromDigits( nDigits )); + + SYMCRYPT_ASSERT_ASYM_ALIGNED( pbScratch ); + + pmMod->tm.montgomery.Rsqr = modR2; + negDivisor = (PUINT32)((PBYTE)modR2 + (nDigits * SYMCRYPT_FDEF_DIGIT_SIZE)); + + // We pre-compute R^2 mod M + + pR2 = (PUINT32) pbScratch; + cbR2 = (2*nDigits + 1) * SYMCRYPT_FDEF_DIGIT_SIZE; + SYMCRYPT_ASSERT( cbScratch >= cbR2 ); + SYMCRYPT_ASSERT( cbScratch >= 2 * nUint32Used * sizeof(UINT32) ); + + // Set it to R^2 + SymCryptWipe( pR2, cbR2 ); + pR2[ 2 * nUint32Used ] = 1; + SymCryptFdefRawDivMod( pR2, 2*nDigits + 1, &pmMod->Divisor, NULL, modR2, pbScratch + cbR2, cbScratch - cbR2 ); + + SymCryptFdefRawNeg( SYMCRYPT_FDEF_INT_PUINT32( &pmMod->Divisor.Int ), 0, negDivisor, nDigits ); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModulusInitMontgomery( + _Inout_ PSYMCRYPT_MODULUS pmMod, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SymCryptFdefModulusInitMontgomeryInternal( pmMod, pmMod->nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32, pbScratch, cbScratch ); +} + +VOID +SymCryptFdefMontgomeryReduceC( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_updates_( 2 * pmMod->nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32 ) PUINT32 pSrc, + _Out_writes_( pmMod->nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32 ) PUINT32 pDst ) +{ + UINT32 nDigits = pmMod->nDigits; + UINT32 nWords = nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32; + PCUINT32 pMod = SYMCRYPT_FDEF_INT_PUINT32( &pmMod->Divisor.Int ); + + UINT32 hc = 0; + for( UINT32 i=0; i<nWords; i++ ) + { + UINT32 m = (UINT32)pmMod->inv64 * pSrc[0]; + UINT64 c = 0; + for( UINT32 j = 0; j < nWords; j++ ) + { + // Invariant: c < 2^32 + c += SYMCRYPT_MUL32x32TO64( pMod[j], m ); + c += pSrc[j]; + // There is no overflow on C because the max value is + // (2^32 - 1) * (2^32 - 1) + 2^32 - 1 + 2^32 - 1 = 2^64 - 1. + pSrc[j] = (UINT32) c; + c >>= 32; + } + c = c + pSrc[nWords] + hc; + pSrc[nWords] = (UINT32) c; + hc = c >> 32; + pSrc++; + } + SYMCRYPT_ASSERT( hc < 2 ); + + UINT32 d = SymCryptFdefRawSub( pSrc, pMod, pDst, nDigits ); + + SYMCRYPT_ASSERT( hc <= d ); // if hc = 1, then d = 1 is mandatory + + SymCryptFdefMaskedCopy( (PCBYTE) pSrc, (PBYTE) pDst, nDigits, hc - (hc | d) ); // copy only if hc=0, d=1 +} + +VOID +SymCryptFdefMontgomeryReduce( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_updates_( 2 * pmMod->nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32 ) PUINT32 pSrc, + _Out_writes_( pmMod->nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32 ) PUINT32 pDst ) +{ +#if SYMCRYPT_CPU_AMD64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_MULX ) ) + { + SymCryptFdefMontgomeryReduceMulx( pmMod, pSrc, pDst ); + } else { + SymCryptFdefMontgomeryReduceAsm( pmMod, pSrc, pDst ); + } +#elif SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_ARM64 | SYMCRYPT_CPU_ARM + SymCryptFdefMontgomeryReduceAsm( pmMod, pSrc, pDst ); +#else + SymCryptFdefMontgomeryReduceC( pmMod, pSrc, pDst ); +#endif +} + + +VOID +SYMCRYPT_CALL +SymCryptFdefModSetPostMontgomery( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_ PSYMCRYPT_MODELEMENT peObj, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + // Montgomery representation for X is R*X mod M where R = 2^<nDigits * bits-per-digit> + // Montgomery reduction performs an implicit division by R + // This function converts to the internal representation by multiplying by R^2 mod M and then performing a Montgomery reduction + UINT32 nDigits = pmMod->nDigits; + + // dcl - this should not incur significant cost, consider checking always + SYMCRYPT_ASSERT( cbScratch >= nDigits * 2 * SYMCRYPT_FDEF_DIGIT_SIZE ); + UNREFERENCED_PARAMETER( cbScratch ); + + SymCryptFdefRawMul( &peObj->d.uint32[0], nDigits, pmMod->tm.montgomery.Rsqr, nDigits, (PUINT32) pbScratch ); + SymCryptFdefMontgomeryReduce( pmMod, (PUINT32) pbScratch, &peObj->d.uint32[0] ); +} + +PCUINT32 +SYMCRYPT_CALL +SymCryptFdefModPreGetMontgomery( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peObj, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + PUINT32 pTmp = (PUINT32) pbScratch; + UINT32 nDigits = pmMod->nDigits; + + // dcl - this should not incur significant cost, consider checking always + SYMCRYPT_ASSERT( cbScratch >= nDigits * 2 * SYMCRYPT_FDEF_DIGIT_SIZE ); + UNREFERENCED_PARAMETER( cbScratch ); + + memcpy( pTmp, &peObj->d.uint32[0], nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ); + SymCryptWipe( pTmp + nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32, nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ); + SymCryptFdefMontgomeryReduce( pmMod, pTmp, pTmp ); + + return pTmp; +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModulusCopyFixupMontgomery( + _In_ PCSYMCRYPT_MODULUS pmSrc, + _Out_ PSYMCRYPT_MODULUS pmDst ) +{ + // We only have to fix up the Montgomery-specific stuff here + // dcl - not sure I understand why you pass pmSrc here + UNREFERENCED_PARAMETER( pmSrc ); + pmDst->tm.montgomery.Rsqr = (PUINT32)((PBYTE)&pmDst->Divisor + SymCryptFdefSizeofDivisorFromDigits( pmDst->nDigits )); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModMulMontgomery( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 nDigits = pmMod->nDigits; + PUINT32 pTmp = (PUINT32) pbScratch; + + // dcl - missing assert? + UNREFERENCED_PARAMETER( cbScratch ); + SYMCRYPT_ASSERT( cbScratch >= 2 * nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ); + + SymCryptFdefRawMul( &peSrc1->d.uint32[0], nDigits, &peSrc2->d.uint32[0], nDigits, pTmp ); + SymCryptFdefMontgomeryReduce( pmMod, pTmp, &peDst->d.uint32[0] ); +} + +#if 0 && SYMCRYPT_CPU_AMD64 +VOID +SYMCRYPT_CALL +SymCryptFdefModMulMontgomeryMulx( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 nDigits = pmMod->nDigits; + PUINT32 pTmp = (PUINT32) pbScratch; + + UNREFERENCED_PARAMETER( cbScratch ); + SYMCRYPT_ASSERT( cbScratch >= 2 * nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ); + + SymCryptFdefRawMulMulx( &peSrc1->d.uint32[0], nDigits, &peSrc2->d.uint32[0], nDigits, pTmp ); + SymCryptFdefMontgomeryReduceMulx( pmMod, pTmp, &peDst->d.uint32[0] ); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModMulMontgomeryMulx1024( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 nDigits = pmMod->nDigits; + PUINT32 pTmp = (PUINT32) pbScratch; + + UNREFERENCED_PARAMETER( cbScratch ); + SYMCRYPT_ASSERT( cbScratch >= 2 * nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ); + + SymCryptFdefRawMulMulx1024( &peSrc1->d.uint32[0], &peSrc2->d.uint32[0], nDigits, pTmp ); + SymCryptFdefMontgomeryReduceMulx1024( pmMod, pTmp, &peDst->d.uint32[0] ); +} +#endif + + +VOID +SYMCRYPT_CALL +SymCryptFdefModSquareMontgomery( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 nDigits = pmMod->nDigits; + PUINT32 pTmp = (PUINT32) pbScratch; + + UNREFERENCED_PARAMETER( cbScratch ); + SYMCRYPT_ASSERT( cbScratch >= 2 * nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ); + + SymCryptFdefRawSquare( &peSrc->d.uint32[0], nDigits, pTmp ); + SymCryptFdefMontgomeryReduce( pmMod, pTmp, &peDst->d.uint32[0] ); +} + + +#if 0 && SYMCRYPT_CPU_AMD64 +VOID +SYMCRYPT_CALL +SymCryptFdefModSquareMontgomeryMulx( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 nDigits = pmMod->nDigits; + PUINT32 pTmp = (PUINT32) pbScratch; + + UNREFERENCED_PARAMETER( cbScratch ); + SYMCRYPT_ASSERT( cbScratch >= 2 * nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ); + + SymCryptFdefRawSquareMulx( &peSrc->d.uint32[0], nDigits, pTmp ); + SymCryptFdefMontgomeryReduceMulx( pmMod, pTmp, &peDst->d.uint32[0] ); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModSquareMontgomeryMulx1024( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 nDigits = pmMod->nDigits; + PUINT32 pTmp = (PUINT32) pbScratch; + + UNREFERENCED_PARAMETER( cbScratch ); + SYMCRYPT_ASSERT( cbScratch >= 2 * nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ); + + SymCryptFdefRawSquareMulx1024( &peSrc->d.uint32[0], nDigits, pTmp ); + SymCryptFdefMontgomeryReduceMulx1024( pmMod, pTmp, &peDst->d.uint32[0] ); +} +#endif + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptFdefModInvMontgomery( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + UINT32 nDigits = pmMod->nDigits; + UINT32 nBytes = nDigits * SYMCRYPT_FDEF_DIGIT_SIZE; + PUINT32 pTmp = (PUINT32) pbScratch; + + SYMCRYPT_ASSERT_ASYM_ALIGNED( pTmp ); + + // + // We have R*X; we first apply the montgomery reduction twice to get X/R, and then invert that + // using the generic inversion to get R/X. + // + SYMCRYPT_ASSERT( cbScratch >= 2 * nBytes ); + memcpy( pTmp, &peSrc->d.uint32[0], nBytes ); + + SymCryptWipe( (PBYTE)pTmp + nBytes, nBytes ); + SymCryptFdefMontgomeryReduce( pmMod, pTmp, pTmp ); + + SymCryptWipe( (PBYTE)pTmp + nBytes, nBytes ); + SymCryptFdefMontgomeryReduce( pmMod, pTmp, &peDst->d.uint32[0] ); + + scError = SymCryptFdefModInvGeneric( pmMod, peDst, peDst, flags, pbScratch, cbScratch ); + + return scError; +} + +#if 0 && SYMCRYPT_CPU_AMD64 + +//===================================== +// 256-bit Montgomery modulus code +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptFdefModInvMontgomery256( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + UINT32 nBytes = 32; + PUINT32 pTmp = (PUINT32) pbScratch; + + SYMCRYPT_ASSERT_ASYM_ALIGNED( pTmp ); + + // + // We have R*X; we first apply the montgomery reduction twice to get X/R, and then invert that + // using the generic inversion to get R/X. + // + SYMCRYPT_ASSERT( cbScratch >= 2 * nBytes ); + memcpy( pTmp, &peSrc->d.uint32[0], nBytes ); + + SymCryptWipe( (PBYTE)pTmp + nBytes, nBytes ); + SymCryptFdefMontgomeryReduce256Asm( pmMod, pTmp, pTmp ); + + SymCryptWipe( (PBYTE)pTmp + nBytes, nBytes ); + SymCryptFdefMontgomeryReduce256Asm( pmMod, pTmp, &peDst->d.uint32[0] ); + + scError = SymCryptFdefModInvGeneric( pmMod, peDst, peDst, flags, pbScratch, cbScratch ); + + return scError; +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModSetPostMontgomeryMulx256( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_ PSYMCRYPT_MODELEMENT peObj, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + // Montgomery representation for X is R*X mod M where R = 2^<nDigits * bits-per-digit> + // Montgomery reduction performs an implicit division by R + // This function converts to the internal representation by multiplying by R^2 mod M and then performing a Montgomery reduction + UINT32 nDigits = pmMod->nDigits; + + SYMCRYPT_ASSERT( cbScratch >= nDigits * 2 * SYMCRYPT_FDEF_DIGIT_SIZE ); + UNREFERENCED_PARAMETER( pbScratch ); + UNREFERENCED_PARAMETER( cbScratch ); + UNREFERENCED_PARAMETER( nDigits ); + + SymCryptFdefModMulMontgomeryMulx256Asm( pmMod, (PSYMCRYPT_MODELEMENT) pmMod->tm.montgomery.Rsqr, peObj, peObj ); +} + +PCUINT32 +SYMCRYPT_CALL +SymCryptFdefModPreGetMontgomery256( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peObj, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + PUINT32 pTmp = (PUINT32) pbScratch; + UINT32 nDigits = 1; + + SYMCRYPT_ASSERT( cbScratch >= nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ); + UNREFERENCED_PARAMETER( cbScratch ); + + memcpy( pTmp, &peObj->d.uint32[0], nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ); + SymCryptFdefMontgomeryReduce256Asm( pmMod, pTmp, pTmp ); + + // This gives the right result, but relies on peObj having zeroed upper half + // on AMD64 when digits are 512 bits. This should be true - check in a CHKed build. + for( UINT32 i=8; i<16; ++i ) + { + SYMCRYPT_ASSERT( pTmp[i] == 0 ); + } + + // Wipe the extra bytes + // SymCryptWipeKnownSize( pTmp + (SYMCRYPT_FDEF_DIGIT_NUINT32 / 2), 32 ); + + return pTmp; +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModulusInitMontgomery256( + _Inout_ PSYMCRYPT_MODULUS pmMod, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SymCryptFdefModulusInitMontgomeryInternal( pmMod, 8, pbScratch, cbScratch ); +} + +//===================================== +// 384-bit Montgomery modulus code +// + +VOID +SYMCRYPT_CALL +SymCryptFdefModSetPostMontgomeryMulxP384( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_ PSYMCRYPT_MODELEMENT peObj, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + // Montgomery representation for X is R*X mod M where R = 2^<nDigits * bits-per-digit> + // Montgomery reduction performs an implicit division by R + // This function converts to the internal representation by multiplying by R^2 mod M and then performing a Montgomery reduction + UINT32 nDigits = pmMod->nDigits; + + SYMCRYPT_ASSERT( cbScratch >= nDigits * 2 * SYMCRYPT_FDEF_DIGIT_SIZE ); + UNREFERENCED_PARAMETER( pbScratch ); + UNREFERENCED_PARAMETER( cbScratch ); + UNREFERENCED_PARAMETER( nDigits ); + + SymCryptFdefModMulMontgomeryMulxP384Asm( pmMod, (PSYMCRYPT_MODELEMENT) pmMod->tm.montgomery.Rsqr, peObj, peObj ); +} + +#if 0 +//===================================== +// 512-bit Montgomery modulus code +// + +VOID +SYMCRYPT_CALL +SymCryptFdefModMulMontgomery512( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 nDigits = pmMod->nDigits; + PUINT32 pTmp = (PUINT32) pbScratch; + + SYMCRYPT_ASSERT( cbScratch >= nDigits * 2 * SYMCRYPT_FDEF_DIGIT_SIZE ); + UNREFERENCED_PARAMETER( cbScratch ); + + SymCryptFdefRawMul512Asm( &peSrc1->d.uint32[0], &peSrc2->d.uint32[0], nDigits, pTmp ); + SymCryptFdefMontgomeryReduce512Asm( pmMod, pTmp, &peDst->d.uint32[0] ); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModSquareMontgomery512( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 nDigits = pmMod->nDigits; + PUINT32 pTmp = (PUINT32) pbScratch; + + SYMCRYPT_ASSERT( cbScratch >= nDigits * 2 * SYMCRYPT_FDEF_DIGIT_SIZE ); + UNREFERENCED_PARAMETER( cbScratch ); + + SymCryptFdefRawSquare512Asm( &peSrc->d.uint32[0], nDigits, pTmp ); + SymCryptFdefMontgomeryReduce512Asm( pmMod, pTmp, &peDst->d.uint32[0] ); +} + +//===================================== +// 1024-bit Montgomery modulus code +// + +VOID +SYMCRYPT_CALL +SymCryptFdefModMulMontgomery1024( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 nDigits = pmMod->nDigits; + PUINT32 pTmp = (PUINT32) pbScratch; + + SYMCRYPT_ASSERT( cbScratch >= nDigits * 2 * SYMCRYPT_FDEF_DIGIT_SIZE ); + UNREFERENCED_PARAMETER( cbScratch ); + + SymCryptFdefRawMul1024Asm( &peSrc1->d.uint32[0], &peSrc2->d.uint32[0], nDigits, pTmp ); + SymCryptFdefMontgomeryReduce1024Asm( pmMod, pTmp, &peDst->d.uint32[0] ); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModSquareMontgomery1024( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 nDigits = pmMod->nDigits; + PUINT32 pTmp = (PUINT32) pbScratch; + + SYMCRYPT_ASSERT( cbScratch >= nDigits * 2 * SYMCRYPT_FDEF_DIGIT_SIZE ); + UNREFERENCED_PARAMETER( cbScratch ); + + SymCryptFdefRawSquare1024Asm( &peSrc->d.uint32[0], nDigits, pTmp ); + SymCryptFdefMontgomeryReduce1024Asm( pmMod, pTmp, &peDst->d.uint32[0] ); +} +#endif + +#endif + +/* Wine hack: asm not supported yet */ + +VOID +SYMCRYPT_CALL +SymCryptFdefMontgomeryReduceAsm( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_ PUINT32 pSrc, + _Out_ PUINT32 pDst ) +{ + SymCryptFdefMontgomeryReduceC( pmMod, pSrc, pDst ); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModDivSmallPow2Mulx( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _In_range_(1, NATIVE_BITS) UINT32 exp, + _Out_ PSYMCRYPT_MODELEMENT peDst ) +{ + SymCryptFdefModDivSmallPow2Generic( pmMod, peSrc, exp, peDst ); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefMontgomeryReduceMulx( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_ PUINT32 pSrc, + _Out_ PUINT32 pDst ) +{ + SymCryptFdefMontgomeryReduceC( pmMod, pSrc, pDst ); +} + +VOID +SYMCRYPT_CALL +SymCryptFdefModDivSmallPow2Asm( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _In_range_(1, NATIVE_BITS) UINT32 exp, + _Out_ PSYMCRYPT_MODELEMENT peDst ) +{ + SymCryptFdefModDivSmallPow2Generic( pmMod, peSrc, exp, peDst ); +} diff --git a/libs/symcrypt/lib/gcm.c b/libs/symcrypt/lib/gcm.c new file mode 100644 index 00000000000..a3a66ddea2a --- /dev/null +++ b/libs/symcrypt/lib/gcm.c @@ -0,0 +1,902 @@ +// +// gcm.c Implementation of the GCM block cipher mode +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +#define GCM_MIN_NONCE_SIZE (1) +#define GCM_MIN_TAG_SIZE (12) +#define GCM_MAX_TAG_SIZE (16) + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptGcmValidateParameters( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ SIZE_T cbNonce, + _In_ UINT64 cbAssociatedData, + _In_ UINT64 cbData, + _In_ SIZE_T cbTag ) +{ + if( pBlockCipher->blockSize != SYMCRYPT_GCM_BLOCK_SIZE ) + { + return SYMCRYPT_WRONG_BLOCK_SIZE; + } + + // + // SP800-38D specifies that the nonce must be at least one bit, but we operate on bytes, + // so the minimum is one byte. + // + if( cbNonce < GCM_MIN_NONCE_SIZE ) + { + return SYMCRYPT_WRONG_NONCE_SIZE; + } + + // + // cbAssociatedData is limited to <2^61 bytes + // + if( (cbAssociatedData >> 61) > 0 ) + { + return SYMCRYPT_WRONG_DATA_SIZE; + } + + // + // per SP800-38D cbData is limited to 2^36 - 32 bytes + // + if( cbData > SYMCRYPT_GCM_MAX_DATA_SIZE ) + { + return SYMCRYPT_WRONG_DATA_SIZE; + } + + if( cbTag < GCM_MIN_TAG_SIZE || cbTag > GCM_MAX_TAG_SIZE ) + { + return SYMCRYPT_WRONG_TAG_SIZE; + } + + return SYMCRYPT_NO_ERROR; +} + + + +VOID +SYMCRYPT_CALL +SymCryptGcmAddMacData( + _Inout_ PSYMCRYPT_GCM_STATE pState, + _In_reads_opt_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + SIZE_T bytesToProcess; + if( pState->bytesInMacBlock > 0 ) + { + bytesToProcess = SYMCRYPT_MIN( cbData, SYMCRYPT_GCM_BLOCK_SIZE - pState->bytesInMacBlock ); + memcpy( &pState->macBlock[pState->bytesInMacBlock], pbData, bytesToProcess ); + pbData += bytesToProcess; + cbData -= bytesToProcess; + pState->bytesInMacBlock += bytesToProcess; + + if( pState->bytesInMacBlock == SYMCRYPT_GCM_BLOCK_SIZE ) + { + SymCryptGHashAppendData( &pState->pKey->ghashKey, + &pState->ghashState, + &pState->macBlock[0], + SYMCRYPT_GCM_BLOCK_SIZE ); + pState->bytesInMacBlock = 0; + } + } + + if( cbData >= SYMCRYPT_GCM_BLOCK_SIZE ) + { + bytesToProcess = cbData & SYMCRYPT_GCM_BLOCK_ROUND_MASK; + + SymCryptGHashAppendData( &pState->pKey->ghashKey, &pState->ghashState, pbData, bytesToProcess ); + + pbData += bytesToProcess; + cbData -= bytesToProcess; + } + + if( cbData > 0 ) + { + memcpy( &pState->macBlock[0], pbData, cbData ); + pState->bytesInMacBlock = cbData; + } +} + + + +VOID +SYMCRYPT_CALL +SymCryptGcmPadMacData( _Inout_ PSYMCRYPT_GCM_STATE pState ) +{ + SIZE_T nBytes; + // + // Pad the MAC data with zeroes until we hit the block size. + // + nBytes = pState->bytesInMacBlock; + if( nBytes > 0 ) + { + SymCryptWipe( &pState->macBlock[nBytes], SYMCRYPT_GCM_BLOCK_SIZE - nBytes ); + SymCryptGHashAppendData( &pState->pKey->ghashKey, &pState->ghashState, &pState->macBlock[0], SYMCRYPT_GCM_BLOCK_SIZE ); + pState->bytesInMacBlock = 0; + } +} + + + +VOID +SYMCRYPT_CALL +SymCryptGcmEncryptDecryptPart( + _Inout_ PSYMCRYPT_GCM_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + SIZE_T bytesToProcess; + SIZE_T bytesUsedInKeyStreamBuffer; + + bytesUsedInKeyStreamBuffer = (SIZE_T) (pState->cbData & SYMCRYPT_GCM_BLOCK_MOD_MASK); + + // + // We update pState->cbData once before we modify cbData. + // pState->cbData is not used in the rest of this function + // + SYMCRYPT_ASSERT( pState->cbData + cbData <= SYMCRYPT_GCM_MAX_DATA_SIZE ); + pState->cbData += cbData; + + if( bytesUsedInKeyStreamBuffer != 0 ) + { + bytesToProcess = SYMCRYPT_MIN( cbData, SYMCRYPT_GCM_BLOCK_SIZE - bytesUsedInKeyStreamBuffer ); + SymCryptXorBytes( pbSrc, &pState->keystreamBlock[bytesUsedInKeyStreamBuffer], pbDst, bytesToProcess ); + pbSrc += bytesToProcess; + pbDst += bytesToProcess; + cbData -= bytesToProcess; + + // + // If there are bytes left in the key stream buffer, then cbData == 0 and we're done. + // If we used up all the bytes, then we are fine, no need to compute the next key stream block + // + } + + if( cbData >= SYMCRYPT_GCM_BLOCK_SIZE ) + { + bytesToProcess = cbData & SYMCRYPT_GCM_BLOCK_ROUND_MASK; + + SYMCRYPT_ASSERT( pState->pKey->pBlockCipher->blockSize == SYMCRYPT_GCM_BLOCK_SIZE ); + SymCryptCtrMsb32( pState->pKey->pBlockCipher, + &pState->pKey->blockcipherKey, + &pState->counterBlock[0], + pbSrc, + pbDst, + bytesToProcess ); + + pbSrc += bytesToProcess; + pbDst += bytesToProcess; + cbData -= bytesToProcess; + } + + if( cbData > 0 ) + { + SymCryptWipeKnownSize( &pState->keystreamBlock[0], SYMCRYPT_GCM_BLOCK_SIZE ); + + SYMCRYPT_ASSERT( pState->pKey->pBlockCipher->blockSize == SYMCRYPT_GCM_BLOCK_SIZE ); + SymCryptCtrMsb32( pState->pKey->pBlockCipher, + &pState->pKey->blockcipherKey, + &pState->counterBlock[0], + &pState->keystreamBlock[0], + &pState->keystreamBlock[0], + SYMCRYPT_GCM_BLOCK_SIZE ); + + SymCryptXorBytes( &pState->keystreamBlock[0], pbSrc, pbDst, cbData ); + + // + // pState->cbData contains the data length after this call already, so it knows how many + // bytes are left in the keystream block + // + } + +} + +FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptGcmResetCounterBlock( + _Inout_ PSYMCRYPT_GCM_STATE pState ) +{ + // Computing the tag for GCM requires invoking the GCTR function with the pre-counter + // block which was computed when the nonce was set. Historically, we only supported 12-byte + // nonces, so we could trivially reset the counter block by just setting the last 4 bytes to + // (DWORD) 1. With support for larger IVs, the pre-counter block is computed from a GHash of + // the nonce, and we don't store the value. Adding a field in the GCM struct to store the value + // would be ABI-breaking, so instead we can recompute the value by decrementing the last 32 bits + // of the counter block by the number of blocks that have been processed (since the counter is + // incremented once per block), plus one for the initial increment. + UINT32 preCounter32 = SYMCRYPT_LOAD_MSBFIRST32(&pState->counterBlock[12]) - + (UINT32) ((pState->cbData + SYMCRYPT_GCM_BLOCK_SIZE - 1) / SYMCRYPT_GCM_BLOCK_SIZE) - 1; + + SYMCRYPT_STORE_MSBFIRST32(&pState->counterBlock[12], preCounter32); +} + +VOID +SYMCRYPT_CALL +SymCryptGcmComputeTag( + _Inout_ PSYMCRYPT_GCM_STATE pState, + _Out_writes_( SYMCRYPT_GCM_BLOCK_SIZE ) PBYTE pbTag ) +{ + SYMCRYPT_ALIGN BYTE buf[2 * SYMCRYPT_GCM_BLOCK_SIZE]; + + SYMCRYPT_STORE_MSBFIRST64( &buf[16], pState->cbAuthData * 8 ); + SYMCRYPT_STORE_MSBFIRST64( &buf[24], pState->cbData * 8 ); + + if( pState->bytesInMacBlock > 0 ) + { + // + // Pad the MAC data with zeroes until we hit the block size + // + SymCryptWipeKnownSize( &buf[0], SYMCRYPT_GCM_BLOCK_SIZE ); + memcpy( buf, &pState->macBlock[0], pState->bytesInMacBlock ); + + SymCryptGHashAppendData( &pState->pKey->ghashKey, &pState->ghashState, &buf[0], 2 * SYMCRYPT_GCM_BLOCK_SIZE ); + } + else + { + SymCryptGHashAppendData( &pState->pKey->ghashKey, &pState->ghashState, &buf[16], SYMCRYPT_GCM_BLOCK_SIZE ); + } + + SymCryptGcmResetCounterBlock(pState); + + // + // Convert the GHash state to an array of bytes + // + SYMCRYPT_STORE_MSBFIRST64( &buf[0], pState->ghashState.ull[1] ); + SYMCRYPT_STORE_MSBFIRST64( &buf[8], pState->ghashState.ull[0] ); + + SYMCRYPT_ASSERT( pState->pKey->pBlockCipher->blockSize == SYMCRYPT_GCM_BLOCK_SIZE ); + SymCryptCtrMsb32( pState->pKey->pBlockCipher, + &pState->pKey->blockcipherKey, + &pState->counterBlock[0], + buf, + pbTag, + SYMCRYPT_GCM_BLOCK_SIZE ); + + SymCryptWipeKnownSize( buf, sizeof( buf ) ); +} + +SYMCRYPT_NOINLINE +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptGcmExpandKey( + _Out_ PSYMCRYPT_GCM_EXPANDED_KEY pExpandedKey, + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_reads_( cbKey ) PCBYTE pbKey, + SIZE_T cbKey ) +{ + SYMCRYPT_ALIGN BYTE H[SYMCRYPT_GCM_BLOCK_SIZE]; + SYMCRYPT_ERROR status = SYMCRYPT_NO_ERROR; + + if( cbKey > SYMCRYPT_GCM_MAX_KEY_SIZE ) + { + status = SYMCRYPT_WRONG_KEY_SIZE; + goto cleanup; + } + + // + // Perform the Block cipher key expansion first + // + pExpandedKey->pBlockCipher = pBlockCipher; + status = pBlockCipher->expandKeyFunc( &pExpandedKey->blockcipherKey, pbKey, cbKey ); + + if( status != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // + // We keep a copy of the key to make it easy to + // implement the SymCryptGcmKeyCopy function + // + pExpandedKey->cbKey = cbKey; + memcpy( &pExpandedKey->abKey[0], pbKey, cbKey ); + + // + // Compute H and the GHASH expanded key + // + SymCryptWipeKnownSize( H, sizeof( H ) ); + pBlockCipher->encryptFunc( &pExpandedKey->blockcipherKey, H, H ); + + + SymCryptGHashExpandKey( &pExpandedKey->ghashKey, H ); + + + SYMCRYPT_SET_MAGIC( pExpandedKey ); + + SymCryptWipeKnownSize( H, sizeof( H ) ); + +cleanup: + + return status; +} + +VOID +SYMCRYPT_CALL +SymCryptGcmKeyCopy( _In_ PCSYMCRYPT_GCM_EXPANDED_KEY pSrc, _Out_ PSYMCRYPT_GCM_EXPANDED_KEY pDst ) +{ + SYMCRYPT_ERROR status; + + SYMCRYPT_CHECK_MAGIC( pSrc ); + + status = SymCryptGcmExpandKey( pDst, pSrc->pBlockCipher, &pSrc->abKey[0], pSrc->cbKey ); + SYMCRYPT_ASSERT( status == SYMCRYPT_NO_ERROR ); +} + +VOID +SYMCRYPT_CALL +SymCryptGcmSetNonce( + _Out_ PSYMCRYPT_GCM_STATE pState, + _In_reads_( cbNonce ) PCBYTE pbNonce, + SIZE_T cbNonce ) +{ + SYMCRYPT_ASSERT( cbNonce >= GCM_MIN_NONCE_SIZE ); + + // Handle the nonce depending on its size, as specified in NIST SP800-38D + if( cbNonce == 12 ) + { + // If len(nonce) = 96 bits (12 bytes), pre-counter block = nonce || (DWORD) 1 + memcpy( &pState->counterBlock[0], pbNonce, cbNonce ); + SymCryptWipeKnownSize( &pState->counterBlock[12], 4 ); + pState->counterBlock[15] = 1; + } + else + { + // If len(nonce) != 96 bits (12 bytes), + // pre-counter block = GHASH(nonce padded to a multiple of 128 bits || (QWORD) len(nonce)) + BYTE buf[SYMCRYPT_GF128_BLOCK_SIZE]; + SIZE_T cbNonceRemainder = cbNonce & (SYMCRYPT_GF128_BLOCK_SIZE - 1); + + // Process all full blocks of the nonce, i.e. all nonce bytes up to a multiple of + // SYMCRYPT_GF128_BLOCK_SIZE. SymCryptGHashAppendData ignores additional data that are + // not a multiple of the block size. We will handle any such remaining data below. + // (This also works if the nonce is less than the block size.) + SymCryptGHashAppendData( &pState->pKey->ghashKey, &pState->ghashState, pbNonce, cbNonce ); + + // If the nonce length is not a multiple of SYMCRYPT_GF128_BLOCK_SIZE, we need to pad any + // remaining data to a multiple of the block size. + if(cbNonceRemainder > 0) + { + SymCryptWipeKnownSize( buf, sizeof(buf) ); + memcpy(buf, pbNonce + cbNonce - cbNonceRemainder, cbNonceRemainder); + SymCryptGHashAppendData( &pState->pKey->ghashKey, &pState->ghashState, buf, sizeof(buf) ); + } + + // Now we append the length of the nonce in bits. We take the length as a 64-bit integer, + // but it too must be padded to 128 bits for use in GHASH. + SymCryptWipeKnownSize( buf, 8 ); + SYMCRYPT_STORE_MSBFIRST64( &buf[8], cbNonce * 8 ); + SymCryptGHashAppendData( &pState->pKey->ghashKey, &pState->ghashState, buf, sizeof(buf) ); + + SymCryptGHashResult( &pState->ghashState, pState->counterBlock ); + SymCryptWipeKnownSize( &pState->ghashState, sizeof( pState->ghashState ) ); + } + + // Increment the last 32 bits of the counter. We'll recalculate the pre-counter block later + // when computing the tag. + SYMCRYPT_STORE_MSBFIRST32( + &pState->counterBlock[12], + 1 + SYMCRYPT_LOAD_MSBFIRST32( &pState->counterBlock[12] ) ); +} + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptGcmInit( + _Out_ PSYMCRYPT_GCM_STATE pState, + _In_ PCSYMCRYPT_GCM_EXPANDED_KEY pExpandedKey, + _In_reads_( cbNonce ) PCBYTE pbNonce, + SIZE_T cbNonce ) +{ + UNREFERENCED_PARAMETER( cbNonce ); // It is used in an ASSERT, but only in CHKed builds. + + SYMCRYPT_CHECK_MAGIC( pExpandedKey ); + + pState->pKey = pExpandedKey; + pState->cbData = 0; + pState->cbAuthData = 0; + pState->bytesInMacBlock = 0; + SymCryptWipeKnownSize( &pState->ghashState, sizeof( pState->ghashState ) ); + + SymCryptGcmSetNonce(pState, pbNonce, cbNonce); + + SYMCRYPT_SET_MAGIC( pState ); +} + + +VOID +SYMCRYPT_CALL +SymCryptGcmStateCopy( + _In_ PCSYMCRYPT_GCM_STATE pSrc, + _In_opt_ PCSYMCRYPT_GCM_EXPANDED_KEY pExpandedKeyCopy, + _Out_ PSYMCRYPT_GCM_STATE pDst ) +{ + SYMCRYPT_CHECK_MAGIC( pSrc ); + + *pDst = *pSrc; + if( pExpandedKeyCopy != NULL ) + { + pDst->pKey = pExpandedKeyCopy; + } + + SYMCRYPT_SET_MAGIC( pDst ); +} + + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptGcmAuthPart( + _Inout_ PSYMCRYPT_GCM_STATE pState, + _In_reads_opt_( cbData ) PCBYTE pbAuthData, + SIZE_T cbData ) +{ + SYMCRYPT_CHECK_MAGIC( pState ); + SYMCRYPT_ASSERT( pState->cbData == 0 ); + + SymCryptGcmAddMacData( pState, pbAuthData, cbData ); + pState->cbAuthData += cbData; +} + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptGcmEncryptPart( + _Inout_ PSYMCRYPT_GCM_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + if( pState->cbData == 0 ) + { + // + // This is the first actual encryption data, pad the Auth data with zeroes if needed. + // + SymCryptGcmPadMacData( pState ); + } + + if ( pState->pKey->pBlockCipher->gcmEncryptPartFunc != NULL ) + { + // + // Use optimized implementation if available + // + (*pState->pKey->pBlockCipher->gcmEncryptPartFunc) ( pState, pbSrc, pbDst, cbData ); + SYMCRYPT_ASSERT( pState->bytesInMacBlock <= 15 ); + } + else + { + SymCryptGcmEncryptPartTwoPass( pState, pbSrc, pbDst, cbData ); + } +} + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptGcmEncryptPartTwoPass( + _Inout_ PSYMCRYPT_GCM_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + // + // Do the actual encryption + // + SymCryptGcmEncryptDecryptPart( pState, pbSrc, pbDst, cbData ); + + // + // We break the read-once/write once rule here by reading the pbDst data back. + // In this particular situation this is safe, and avoiding it is expensive as it + // requires an extra copy and an extra memory buffer. + // The first write exposes the GCM key stream, independent of the underlying data that + // we are processing. From an attacking point of view we can think of this as literally + // handing over the key stream. So encryption consists of two steps: + // - hand over the key stream + // - MAC some ciphertext + // In this view (which has equivalent security properties to GCM) is obviously doesn't + // matter that we read pbDst back. + // + + SymCryptGcmAddMacData( pState, pbDst, cbData ); +} + + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptGcmDecryptPart( + _Inout_ PSYMCRYPT_GCM_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + if( pState->cbData == 0 ) + { + // + // This is the first actual encryption data, pad the Auth data with zeroes if needed. + // + SymCryptGcmPadMacData( pState ); + } + + if ( pState->pKey->pBlockCipher->gcmDecryptPartFunc != NULL ) + { + // + // Use optimized implementation if available + // + (*pState->pKey->pBlockCipher->gcmDecryptPartFunc) ( pState, pbSrc, pbDst, cbData ); + SYMCRYPT_ASSERT( pState->bytesInMacBlock <= 15 ); + } + else + { + SymCryptGcmDecryptPartTwoPass( pState, pbSrc, pbDst, cbData ); + } +} + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptGcmDecryptPartTwoPass( + _Inout_ PSYMCRYPT_GCM_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + SymCryptGcmAddMacData( pState, pbSrc, cbData ); + + // + // Do the actual decryption + // This violates the read-once rule, but it is safe for the same reasons as above + // in the encryption case. + // + + SymCryptGcmEncryptDecryptPart( pState, pbSrc, pbDst, cbData ); +} + + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptGcmEncryptFinal( + _Inout_ PSYMCRYPT_GCM_STATE pState, + _Out_writes_( cbTag ) PBYTE pbTag, + SIZE_T cbTag ) +{ + SYMCRYPT_ALIGN BYTE buf[SYMCRYPT_GCM_BLOCK_SIZE]; + + SYMCRYPT_ASSERT( cbTag >= GCM_MIN_TAG_SIZE && cbTag <= GCM_MAX_TAG_SIZE ); + + SymCryptGcmComputeTag( pState, &buf[0] ); + memcpy( pbTag, buf, cbTag ); + + SymCryptWipeKnownSize( buf, sizeof( buf ) ); + + SymCryptWipeKnownSize( pState, sizeof( *pState ) ); + SYMCRYPT_ASSERT( pState->bytesInMacBlock == 0 ); +} + +SYMCRYPT_NOINLINE +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptGcmDecryptFinal( + _Inout_ PSYMCRYPT_GCM_STATE pState, + _In_reads_( cbTag ) PCBYTE pbTag, + SIZE_T cbTag ) +{ + SYMCRYPT_ALIGN BYTE buf[SYMCRYPT_GCM_BLOCK_SIZE]; + SYMCRYPT_ERROR status; + + SYMCRYPT_ASSERT( cbTag >= GCM_MIN_TAG_SIZE && cbTag <= GCM_MAX_TAG_SIZE ); + + SymCryptGcmComputeTag( pState, &buf[0] ); + + if( !SymCryptEqual( pbTag, buf, cbTag ) ) + { + status = SYMCRYPT_AUTHENTICATION_FAILURE; + } + else + { + status = SYMCRYPT_NO_ERROR; + } + + SymCryptWipeKnownSize( buf, sizeof( buf ) ); + + SymCryptWipeKnownSize( pState, sizeof( *pState ) ); + SYMCRYPT_ASSERT( pState->bytesInMacBlock == 0 ); + + return status; +} + + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptGcmEncrypt( + _In_ PCSYMCRYPT_GCM_EXPANDED_KEY pExpandedKey, + _In_reads_( cbNonce ) PCBYTE pbNonce, + SIZE_T cbNonce, + _In_reads_opt_( cbAuthData ) PCBYTE pbAuthData, + SIZE_T cbAuthData, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData, + _Out_writes_( cbTag ) PBYTE pbTag, + SIZE_T cbTag ) +{ + SYMCRYPT_ALIGN BYTE buf[2 * SYMCRYPT_GCM_BLOCK_SIZE]; + SYMCRYPT_GCM_STATE state; + PSYMCRYPT_GCM_STATE pState = &state; + + // SymCryptGcmInit( &state, pExpandedKey, pbNonce, cbNonce ); + UNREFERENCED_PARAMETER( cbNonce ); // It is used in an ASSERT, but only in CHKed builds. + + SYMCRYPT_ASSERT( cbNonce >= GCM_MIN_NONCE_SIZE ); + SYMCRYPT_ASSERT( cbTag >= GCM_MIN_TAG_SIZE && cbTag <= GCM_MAX_TAG_SIZE ); + + SYMCRYPT_CHECK_MAGIC( pExpandedKey ); + + pState->pKey = pExpandedKey; + pState->cbData = 0; + pState->cbAuthData = 0; + pState->bytesInMacBlock = 0; + SymCryptWipeKnownSize( &pState->ghashState, sizeof( pState->ghashState ) ); + + SymCryptGcmSetNonce( pState, pbNonce, cbNonce ); + + // SymCryptGcmAuthPart( &state, pbAuthData, cbAuthData ); + pState->cbAuthData += cbAuthData; + if( cbAuthData >= SYMCRYPT_GCM_BLOCK_SIZE ) + { + SIZE_T bytesToDo = cbAuthData & SYMCRYPT_GCM_BLOCK_ROUND_MASK; + + SymCryptGHashAppendData( &pState->pKey->ghashKey, &pState->ghashState, pbAuthData, bytesToDo ); + + pbAuthData += bytesToDo; + cbAuthData -= bytesToDo; + } + + if( cbAuthData > 0 ) + { + // + // Pad the MAC data with zeroes until we hit the block size. + // + SymCryptWipeKnownSize( &pState->macBlock[0], SYMCRYPT_GCM_BLOCK_SIZE ); + memcpy( &pState->macBlock[0], pbAuthData, cbAuthData ); + SymCryptGHashAppendData( &pState->pKey->ghashKey, &pState->ghashState, &pState->macBlock[0], SYMCRYPT_GCM_BLOCK_SIZE ); + } + + // SymCryptGcmEncryptPart( &state, pbSrc, pbDst, cbData ); + if ( pState->pKey->pBlockCipher->gcmEncryptPartFunc != NULL ) + { + // + // Use optimized implementation if available + // + (*pState->pKey->pBlockCipher->gcmEncryptPartFunc) ( pState, pbSrc, pbDst, cbData ); + } + else + { + SymCryptGcmEncryptPartTwoPass( pState, pbSrc, pbDst, cbData ); + } + + // SymCryptGcmEncryptFinal( &state, pbTag, cbTag ); + SYMCRYPT_STORE_MSBFIRST64( &buf[16], pState->cbAuthData * 8 ); + SYMCRYPT_STORE_MSBFIRST64( &buf[24], pState->cbData * 8 ); + + if( pState->bytesInMacBlock > 0 ) + { + // + // Pad the MAC data with zeroes until we hit the block size + // + SymCryptWipeKnownSize( &buf[0], SYMCRYPT_GCM_BLOCK_SIZE ); + memcpy( buf, &pState->macBlock[0], pState->bytesInMacBlock ); + + SymCryptGHashAppendData( &pState->pKey->ghashKey, &pState->ghashState, &buf[0], 2 * SYMCRYPT_GCM_BLOCK_SIZE ); + } + else + { + SymCryptGHashAppendData( &pState->pKey->ghashKey, &pState->ghashState, &buf[16], SYMCRYPT_GCM_BLOCK_SIZE ); + } + + // Reset the counter block prior to computing the tag + SymCryptGcmResetCounterBlock( pState ); + + // + // Convert the GHash state to an array of bytes + // + SYMCRYPT_STORE_MSBFIRST64( &buf[0], pState->ghashState.ull[1] ); + SYMCRYPT_STORE_MSBFIRST64( &buf[8], pState->ghashState.ull[0] ); + + SYMCRYPT_ASSERT( pState->pKey->pBlockCipher->blockSize == SYMCRYPT_GCM_BLOCK_SIZE ); + SymCryptCtrMsb32( pState->pKey->pBlockCipher, + &pState->pKey->blockcipherKey, + &pState->counterBlock[0], + buf, + buf, + SYMCRYPT_GCM_BLOCK_SIZE ); + + memcpy( pbTag, buf, cbTag ); + + SymCryptWipeKnownSize( buf, sizeof( buf ) ); + SymCryptWipeKnownSize( pState, sizeof( *pState ) ); +} + + +SYMCRYPT_NOINLINE +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptGcmDecrypt( + _In_ PCSYMCRYPT_GCM_EXPANDED_KEY pExpandedKey, + _In_reads_( cbNonce ) PCBYTE pbNonce, + SIZE_T cbNonce, + _In_reads_opt_( cbAuthData ) PCBYTE pbAuthData, + SIZE_T cbAuthData, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData, + _In_reads_( cbTag ) PCBYTE pbTag, + SIZE_T cbTag ) +{ + SYMCRYPT_ERROR status; + SYMCRYPT_ALIGN BYTE buf[2 * SYMCRYPT_GCM_BLOCK_SIZE]; + SYMCRYPT_GCM_STATE state; + PSYMCRYPT_GCM_STATE pState = &state; + + // SymCryptGcmInit( &state, pExpandedKey, pbNonce, cbNonce ); + UNREFERENCED_PARAMETER( cbNonce ); // It is used in an ASSERT, but only in CHKed builds. + + SYMCRYPT_ASSERT( cbNonce >= GCM_MIN_NONCE_SIZE ); + SYMCRYPT_ASSERT( cbTag >= GCM_MIN_TAG_SIZE && cbTag <= GCM_MAX_TAG_SIZE ); + + SYMCRYPT_CHECK_MAGIC( pExpandedKey ); + + pState->pKey = pExpandedKey; + pState->cbData = 0; + pState->cbAuthData = 0; + pState->bytesInMacBlock = 0; + SymCryptWipeKnownSize( &pState->ghashState, sizeof( pState->ghashState ) ); + + SymCryptGcmSetNonce( pState, pbNonce, cbNonce ); + + // SymCryptGcmAuthPart( &state, pbAuthData, cbAuthData ); + pState->cbAuthData += cbAuthData; + if( cbAuthData >= SYMCRYPT_GCM_BLOCK_SIZE ) + { + SIZE_T bytesToDo = cbAuthData & SYMCRYPT_GCM_BLOCK_ROUND_MASK; + + SymCryptGHashAppendData( &pState->pKey->ghashKey, &pState->ghashState, pbAuthData, bytesToDo ); + + pbAuthData += bytesToDo; + cbAuthData -= bytesToDo; + } + + if( cbAuthData > 0 ) + { + // + // Pad the MAC data with zeroes until we hit the block size. + // + SymCryptWipeKnownSize( &pState->macBlock[0], SYMCRYPT_GCM_BLOCK_SIZE ); + memcpy( &pState->macBlock[0], pbAuthData, cbAuthData ); + SymCryptGHashAppendData( &pState->pKey->ghashKey, &pState->ghashState, &pState->macBlock[0], SYMCRYPT_GCM_BLOCK_SIZE ); + } + + // SymCryptGcmDecryptPart( &state, pbSrc, pbDst, cbData ); + if ( pState->pKey->pBlockCipher->gcmDecryptPartFunc != NULL ) + { + // + // Use optimized implementation if available + // + (*pState->pKey->pBlockCipher->gcmDecryptPartFunc) ( pState, pbSrc, pbDst, cbData ); + } + else + { + SymCryptGcmDecryptPartTwoPass( pState, pbSrc, pbDst, cbData ); + } + + //status = SymCryptGcmDecryptFinal( &state, pbTag, cbTag ); + SYMCRYPT_STORE_MSBFIRST64( &buf[16], pState->cbAuthData * 8 ); + SYMCRYPT_STORE_MSBFIRST64( &buf[24], pState->cbData * 8 ); + + if( pState->bytesInMacBlock > 0 ) + { + // + // Pad the MAC data with zeroes until we hit the block size + // + SymCryptWipeKnownSize( &buf[0], SYMCRYPT_GCM_BLOCK_SIZE ); + memcpy( buf, &pState->macBlock[0], pState->bytesInMacBlock ); + + SymCryptGHashAppendData( &pState->pKey->ghashKey, &pState->ghashState, &buf[0], 2 * SYMCRYPT_GCM_BLOCK_SIZE ); + } + else + { + SymCryptGHashAppendData( &pState->pKey->ghashKey, &pState->ghashState, &buf[16], SYMCRYPT_GCM_BLOCK_SIZE ); + } + + SymCryptGcmResetCounterBlock( pState ); + + // + // Convert the GHash state to an array of bytes + // + SYMCRYPT_STORE_MSBFIRST64( &buf[0], pState->ghashState.ull[1] ); + SYMCRYPT_STORE_MSBFIRST64( &buf[8], pState->ghashState.ull[0] ); + + SYMCRYPT_ASSERT( pState->pKey->pBlockCipher->blockSize == SYMCRYPT_GCM_BLOCK_SIZE ); + SymCryptCtrMsb32( pState->pKey->pBlockCipher, + &pState->pKey->blockcipherKey, + &pState->counterBlock[0], + buf, + buf, + SYMCRYPT_GCM_BLOCK_SIZE ); + + if( !SymCryptEqual( pbTag, buf, cbTag ) ) + { + status = SYMCRYPT_AUTHENTICATION_FAILURE; + } + else + { + status = SYMCRYPT_NO_ERROR; + } + + SymCryptWipeKnownSize( buf, sizeof( buf ) ); + SymCryptWipeKnownSize( pState, sizeof( *pState ) ); + + if( status != SYMCRYPT_NO_ERROR ) + { + SymCryptWipe( pbDst, cbData ); + } + + return status; +} + + +static const BYTE SymCryptGcmSelftestResult[3 + SYMCRYPT_AES_BLOCK_SIZE ] = +{ + 0xa5, 0x4c, 0x60, + 0x80, 0xb0, 0x48, 0x6d, 0x03, 0x9f, 0xea, 0xc3, 0x3c, 0x28, 0x96, 0x3f, 0x99, 0x8a, 0x77, 0x43, +}; + +VOID +SYMCRYPT_CALL +SymCryptGcmSelftest(void) +{ + BYTE buf[ 3 + SYMCRYPT_AES_BLOCK_SIZE ]; + SYMCRYPT_GCM_EXPANDED_KEY key; + SYMCRYPT_ERROR err; + + if( SymCryptGcmExpandKey( &key, SymCryptAesBlockCipher, SymCryptTestKey32, 16 ) != SYMCRYPT_NO_ERROR ) + { + SymCryptFatal( 'gcm0' ); + } + + SymCryptGcmEncrypt( &key, + &SymCryptTestKey32[16], 12, + NULL, 0, + &SymCryptTestMsg3[0], buf, 3, + &buf[3], SYMCRYPT_AES_BLOCK_SIZE ); + + SymCryptInjectError( buf, sizeof( buf ) ); + if( memcmp( buf, SymCryptGcmSelftestResult, sizeof( buf ) ) != 0 ) + { + SymCryptFatal( 'gcm1' ); + } + + // inject error into the ciphertext or tag + SymCryptInjectError( buf, sizeof( buf ) ); + + err = SymCryptGcmDecrypt( &key, + &SymCryptTestKey32[16], 12, + NULL, 0, + buf, buf, 3, + &buf[3], SYMCRYPT_AES_BLOCK_SIZE ); + + SymCryptInjectError( buf, 3 ); + + if( err != SYMCRYPT_NO_ERROR || memcmp( buf, SymCryptTestMsg3, 3 ) != 0 ) + { + SymCryptFatal( 'gcm2' ); + } + +} diff --git a/libs/symcrypt/lib/gen_int.c b/libs/symcrypt/lib/gen_int.c new file mode 100644 index 00000000000..5f5983358a0 --- /dev/null +++ b/libs/symcrypt/lib/gen_int.c @@ -0,0 +1,368 @@ +// +// gen_int.c Generic integer algorithms (not tied to low-level implementations) +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + + +UINT64 +SYMCRYPT_CALL +SymCryptUint64Gcd( UINT64 a, UINT64 b, UINT32 flags ) +{ + UINT64 swap; + UINT64 tmp; + UINT64 a2; + UINT64 b2; + UINT32 i; + +/* + Algorithm outline: + + if( b even ) + swap (a,b) + + loop: + { invariant: b is odd } + if( a even ) + a = a/2 + else + if a < b + swap (a,b) + a = (a - b) / 2 + + We ignore the data_public flag as we currently always use a side-channel safe implementation + + to compute (a < b) on 64-bit values is hard if we want to avoid +*/ + SYMCRYPT_ASSERT( (flags & SYMCRYPT_FLAG_GCD_INPUTS_NOT_BOTH_EVEN) != 0 && ((a | b) & 1) != 0 ); + UNREFERENCED_PARAMETER( flags ); + + // First we make sure that b is odd + // If b even: swap (a,b) + swap = ~(0 - (b & 1)); + tmp = (a ^ b) & swap; + a ^= tmp; + b ^= tmp; + + // Each loop iteration reduces len(a) + len(b) by at least 1, so looping 127 times is enough. + // For inputs (2^63, 2^63 + 1) we get 63 iterations to reduce a to 1, and then another 63 to get + // the other value to 1, plus one more to make it 0. + for( i=0; i < 127; i++ ) + { + // Compute the result of the 'else' part of the if( a even ) into (a2, b2) + // First we evaluate (a < b), which is a bit tricky without access to the carry flag. + // a < b = (b>>63) if ((a^b) >> 63) == 1 + // (a - b) >> 63 otherwise + tmp = a ^ b; + tmp = (tmp & b) | (~tmp & (a-b)); + swap = 0 - (tmp >> 63); + + // Now swap if a < b into (a2, b2) + tmp = (a ^ b) & swap; + a2 = a ^ tmp; + b2 = b ^ tmp; + + // + a2 = (a2 - b2) / 2; + + // Compute the (a is odd) condition + tmp = 0 - (a & 1); + + // Assemble the final result + a = (tmp & a2) | (~tmp & a/2); + b = (tmp & b2) | (~tmp & b); + } + + SYMCRYPT_ASSERT( a == 0 ); + return b; +} + + +/* +Extended GCD notes. + +A side-channel safe implementation cannot effectively use Euclid's algorithm. +The quotient is typically very small, but it can be very large. An SCS implementation +would require the quotient to always be treated as a full-sized number, which would kill performance. +Instead we use the binary algorithm which is easier to adapt to side-channel safety. + +Basic algorithm for inputs S1 and S2: + Eliminate the joint factors of two. These are added later to the result + For now we assume that both S1 and S2 are non-zero and S2 is odd. + +Invariant: + A = A1 * S1 (mod S2) + B = B1 * S1 (mod S2) + B is odd + +Initial values: + A = S1; A1 = 1; + B = S2; B1 = 0; + +Main loop: + + t = len(A) + len(B) - 1 // Careful of overflows, use a SIZE_T + + repeat t times: + 1. if A odd and A < B: + Swap (A, A1) with (B, B1) + 2. if A odd: + A -= B; + A1 -= B1 (mod S2); + 3. A /= 2; + A1 /= 2 (mod S2); + +Proof of the invariant: + It is easy to see that initially the invariant holds (S2 is odd). + + Assume the invariant holds at the start of the loop's iteration. + Step 1 of the main loop preserves the invariant since the first 2 + equations of the invariant are the same for A's and B's and + the swapping happens only if A is odd. Therefore, B is odd + after step 1. + Step 2 essentially subtracts the second equation of the invariant + from the first (modulo S2). This preserves the invariant since step + 1 ensured that A >= B (when A odd), so the operation A = A-B holds + modulo S2. + Step 3 essentially multiplies the first equation of the invariant + with the inverse of 2 modulo S2. Since S2 is odd we know that the + inverse exists. Also the operation A = A/2 is correct modulo S2 + because steps 1 and 2 ensured that A is even at this point. + (To see this, consider 2*a = x (mod S2) => a = x*2^{-1} (mod S2) + where a is an integer and 2^{-1} is the inverse of 2 modulo S2) + +Termination/Results: + Each iteration reduces len(A) + len(B) by at least one until A=0. + When A=0 the loop does nothing except churn by dividing A and A1 + by 2 every time. + After len(A)+len(B)-1 iterations, A must be zero. At that point + we have + + B = GCD + B1 * S1 = GCD (mod S2) + + The LCM is calculated as S1*S2 / GCD. + + InvS1ModS2 is defined as the smallest value X such that + X*S1 = GCD (mod S2), but B1 might not be the smallest solution. + Let P2 = S2/GCD. + Any two solutions to X*S1 = GCD (mod S2) has (X1-X2)*S1 mod S2 = 0, + so X1-X2 is a multiple of P2. Therefore we need to reduce B1 modulo P2 + to get the smallest solution for InvS1ModS2. + + ** Notice that if B1 is a multiple of S2 (or 0), which means that GCD is equal to S2, + then the above result is 0. In that case InvS1ModS2 is undefined. + + Similarly, InvS2ModS1 is defined as the smallest value Y such that + Y*S2 = GCD (mod S1). We have that for some integer q: + + q*S2 = B1*S1 - GCD => (-q mod S1) * S2 = GCD (mod S1) + + As above, if B1 is 0, then InvS2ModS1 is undefined. Therefore we ignore this case. + For the defined case, B1>=1 and S1>=GCD which implies that q >= 0. This + allows us to divide (B1*S1 - GCD) by S2. + Therefore InvS2ModS1 can be computed as -((B1*S1 - GCD)/S2) mod S1. + +For simplicity, our generic implementation works with all values the same size. +This can be less efficient if one input is much larger than the other, for +example for RSA key generation when one input is 1000+ bits and the other 17 bits. +However, that is not a high-performance path. If it is, a dedicated GCD with one +input a UINT32 or UINT64 would be the solution to a much faster extended GCD. +*/ +VOID +SYMCRYPT_CALL +SymCryptIntExtendedGcd( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + UINT32 flags, + _Out_opt_ PSYMCRYPT_INT piGcd, + _Out_opt_ PSYMCRYPT_INT piLcm, + _Out_opt_ PSYMCRYPT_INT piInvSrc1ModSrc2, + _Out_opt_ PSYMCRYPT_INT piInvSrc2ModSrc1, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 nDigits = SYMCRYPT_MAX( SymCryptIntDigitsizeOfObject( piSrc1 ), SymCryptIntDigitsizeOfObject( piSrc2 )); + PSYMCRYPT_INT piA; // size nDigits + PSYMCRYPT_INT piB; // size nDigits, NOT ALLOCATED (part of the pdGcd divisor) + PSYMCRYPT_INT piTmp; // size nDigits + PSYMCRYPT_INT piA1; // size nDigits + PSYMCRYPT_INT piB1; // size nDigits + PSYMCRYPT_INT piTmpDbl; // size 2*nDigits + PSYMCRYPT_DIVISOR pdGcd; // size nDigits + PSYMCRYPT_DIVISOR pdTmp; // size nDigits + UINT32 cbInt; + UINT32 cbWideInt; + UINT32 cbDivisor; + SIZE_T cbFnScratch; + UINT32 t; + UINT32 c; + UINT32 d; + + UNREFERENCED_PARAMETER( flags ); // Currently not used to improve performance. + + // Compute how much scratch space we need for the functions we call + cbFnScratch = SYMCRYPT_SCRATCH_BYTES_FOR_INT_DIVMOD( 2 * nDigits, nDigits ); + cbFnScratch = SYMCRYPT_MAX( cbFnScratch, SYMCRYPT_SCRATCH_BYTES_FOR_INT_MUL( 2*nDigits ) ); + cbFnScratch = SYMCRYPT_MAX( cbFnScratch, SYMCRYPT_SCRATCH_BYTES_FOR_INT_TO_DIVISOR( nDigits ) ); + + cbInt = SymCryptSizeofIntFromDigits( nDigits ); + cbWideInt = SymCryptSizeofIntFromDigits( 2*nDigits ); + cbDivisor = SymCryptSizeofDivisorFromDigits( nDigits ); + + SYMCRYPT_ASSERT( cbWideInt != 0 ); + SYMCRYPT_ASSERT( cbScratch >= 4 * cbInt + + 1 * cbWideInt + + 2 * cbDivisor + + cbFnScratch ); + + piA = SymCryptIntCreate( pbScratch, cbInt, nDigits ); + pbScratch += cbInt; cbScratch -= cbInt; + // piB is stored inside the pdGcd object created later + piTmp = SymCryptIntCreate( pbScratch, cbInt, nDigits ); + pbScratch += cbInt; cbScratch -= cbInt; + piA1 = SymCryptIntCreate( pbScratch, cbInt, nDigits ); + pbScratch += cbInt; cbScratch -= cbInt; + piB1 = SymCryptIntCreate( pbScratch, cbInt, nDigits ); + pbScratch += cbInt; cbScratch -= cbInt; + + piTmpDbl = SymCryptIntCreate( pbScratch, cbWideInt, 2 * nDigits ); + pbScratch += cbWideInt; cbScratch -= cbWideInt; + + pdGcd = SymCryptDivisorCreate( pbScratch, cbDivisor, nDigits ); + pbScratch += cbDivisor; cbScratch -= cbDivisor; + piB = SymCryptIntFromDivisor( pdGcd ); + + pdTmp = SymCryptDivisorCreate( pbScratch, cbDivisor, nDigits ); + pbScratch += cbDivisor; cbScratch -= cbDivisor; + + SymCryptIntCopyMixedSize( piSrc1, piA ); // Ignore the error return value here as we know + SymCryptIntCopyMixedSize( piSrc2, piB ); // that the destination integers are large enough. + + SymCryptIntSetValueUint32( 1, piA1 ); + SymCryptIntSetValueUint32( 0, piB1 ); + + // Currently not supported: Src1 to be 0 or Src2 to be even + SYMCRYPT_ASSERT( !SymCryptIntIsEqualUint32( piA, 0 ) ); + SYMCRYPT_ASSERT( (SymCryptIntGetValueLsbits32( piB ) & 1) != 0 ); + if ( SymCryptIntIsEqualUint32( piA, 0 ) || + ((SymCryptIntGetValueLsbits32( piB ) & 1) == 0) ) + { + goto cleanup; + } + + // Currently not supported: piInvSrc2ModSrc1 != NULL and max( Src1.nDigits, Src2.nDigits ) * 2 > SymCryptDigitsFromBits(SYMCRYPT_INT_MAX_BITS) + if( (piInvSrc2ModSrc1 != NULL) && (piTmpDbl == NULL) ) + { + goto cleanup; + } + + t = SymCryptIntBitsizeOfObject( piSrc1 ) + SymCryptIntBitsizeOfObject( piSrc2 ) - 1; + while( t > 0 ) + { + t--; + + //if A odd and A < B: + // Swap (A, A1) with (B, B1) + c = 1 & (SymCryptIntGetValueLsbits32( piA ) & SymCryptIntSubSameSize( piA, piB, piTmp ) ); + SymCryptIntConditionalSwap( piA, piB, c ); + SymCryptIntConditionalSwap( piA1, piB1, c ); + + //if A odd: + // A -= B; A1 -= B1 (mod S2); + c = 1 & SymCryptIntGetValueLsbits32( piA ); + SymCryptIntSubSameSize( piA, piB, piTmp ); // Never a carry due to the previous conditional swap + SymCryptIntConditionalCopy( piTmp, piA, c ); + + d = SymCryptIntSubSameSize( piA1, piB1, piTmp ); + SymCryptIntConditionalCopy( piTmp, piA1, c ); + SymCryptIntAddMixedSize( piA1, piSrc2, piTmp ); + SymCryptIntConditionalCopy( piTmp, piA1, c & d ); + + // A /= 2; A1 /= 2 (mod S2); + SYMCRYPT_ASSERT( (SymCryptIntGetValueLsbits32( piA ) & 1) == 0 ); + SymCryptIntShr1( 0, piA, piA ); + c = SymCryptIntGetValueLsbits32( piA1 ) & 1; + d = SymCryptIntAddMixedSize( piA1, piSrc2, piTmp ); + SymCryptIntConditionalCopy( piTmp, piA1, c ); + SymCryptIntShr1( c & d, piA1, piA1 ); + + } + + // B = GCD, B1 * S1 = GCD (mod S2) + // A = 0, A1 is scratch + // + // Algorithm from here: + // GCD as divisor + // LCM = S1 * S2 / GCD. + // P2 = S2 / GCD, as divisor (only for InvS1ModS2) + // InvS1ModS2 = B1 mod P2 + // InvS2ModS1 = -((B1*S1 - GCD) div S2) mod S1 + + if( piGcd != NULL ) + { + SymCryptIntCopyMixedSize( piB, piGcd ); + } + + if( piLcm == NULL && piInvSrc1ModSrc2 == NULL && piInvSrc2ModSrc1 == NULL ) + { + // Only GCD needed; don't do the other work + goto cleanup; + } + + SymCryptIntCopyMixedSize( piB, SymCryptIntFromDivisor( pdGcd ) ); // copy into INT of the right size + + // IntToDivisor requirement: + // Gcd !=0 + SymCryptIntToDivisor( SymCryptIntFromDivisor( pdGcd ), pdGcd, 3, 0, pbScratch, cbScratch ); + + if( piLcm != NULL ) + { + // LCM = S1 * S2 / GCD + SymCryptIntMulMixedSize( piSrc1, piSrc2, piLcm, pbScratch, cbScratch ); + SymCryptIntDivMod( piLcm, pdGcd, piLcm, NULL, pbScratch, cbScratch ); + } + + if( piInvSrc1ModSrc2 != NULL ) + { + // Future optimization: if GCD == 1 then we can just copy B1. + SymCryptIntDivMod( piSrc2, pdGcd, SymCryptIntFromDivisor( pdTmp ), NULL, pbScratch, cbScratch ); + + // IntToDivisor requirement: + // Src2 / pdGcd > 0 + SymCryptIntToDivisor( SymCryptIntFromDivisor( pdTmp ), pdTmp, 1, 0, pbScratch, cbScratch ); + SymCryptIntDivMod( piB1, pdTmp, NULL, piInvSrc1ModSrc2, pbScratch, cbScratch ); + } + + if( piInvSrc2ModSrc1 != NULL ) + { + // InvS2ModS1 = - ( (B1*S1 - GCD)/S2 ) mod S1 + + // S2 as divisor + SymCryptIntCopyMixedSize( piSrc2, SymCryptIntFromDivisor( pdTmp ) ); + + // IntToDivisor requirement: + // Src2 is odd --> Src2 != 0 + SymCryptIntToDivisor( SymCryptIntFromDivisor( pdTmp ), pdTmp, 1, 0, pbScratch, cbScratch ); + + SymCryptIntMulMixedSize( piB1, piSrc1, piTmpDbl, pbScratch, cbScratch ); + SymCryptIntSubMixedSize( piTmpDbl, piB, piTmpDbl ); // Never a borrow if B1 >= 1 + SymCryptIntDivMod( piTmpDbl, pdTmp, piTmpDbl, NULL, pbScratch, cbScratch ); + + // and reduce modulo S1 + SymCryptIntCopyMixedSize( piSrc1, SymCryptIntFromDivisor( pdTmp ) ); + + // IntToDivisor requirement: + // Src1 > 0 + SymCryptIntToDivisor( SymCryptIntFromDivisor( pdTmp ), pdTmp, 1, 0, pbScratch, cbScratch ); + SymCryptIntDivMod( piTmpDbl, pdTmp, NULL, piInvSrc2ModSrc1, pbScratch, cbScratch ); + + // Negative modulo S1 + SymCryptIntSubMixedSize( SymCryptIntFromDivisor( pdTmp ), piInvSrc2ModSrc1, piInvSrc2ModSrc1 ); // Never a borrow as piInvSrc2ModSrc1 < S1 + } + +cleanup: + return; // Need a statement after a label... +} diff --git a/libs/symcrypt/lib/ghash.c b/libs/symcrypt/lib/ghash.c new file mode 100644 index 00000000000..32533a74dfb --- /dev/null +++ b/libs/symcrypt/lib/ghash.c @@ -0,0 +1,951 @@ +// +// GHASH.c +// +// Implementation of the NIST SP800-38D GHASH function which is the +// core authentication function for the GCM and GMAC modes. +// +// This implementation was done by Niels Ferguson for the RSA32.lib library in 2008, +// and adapted to the SymCrypt library in 2009. +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" +#include "ghash_definitions.h" + +////////////////////////////////////////////////////////////////////////////// +// Platform-independent code +// + +// +// GHashExpandKeyC +// Generic GHash key expansion routine, works on all platforms. +// This function computes a table of H, Hx, Hx^2, Hx^3, ..., Hx^127 +// +VOID +SYMCRYPT_CALL +SymCryptGHashExpandKeyC( + _Out_writes_( SYMCRYPT_GF128_FIELD_SIZE ) PSYMCRYPT_GF128_ELEMENT expandedKey, + _In_reads_( SYMCRYPT_GF128_BLOCK_SIZE ) PCBYTE pH ) +{ + UINT64 H0, H1, t; + UINT32 i; + + // + // (H1, H0) form a 128-bit integer, H1 is the upper part, H0 the lower part. + // Convert pH[] to (H1, H0) using MSByte first convention. + // + H1 = SYMCRYPT_LOAD_MSBFIRST64( &pH[0] ); + H0 = SYMCRYPT_LOAD_MSBFIRST64( &pH[8] ); + + for( i=0; i<SYMCRYPT_GF128_FIELD_SIZE; i++ ) + { + expandedKey[i].ull[0] = H0; + expandedKey[i].ull[1] = H1; + // + // Multiply (H1,H0) by x in the GF(2^128) field using the field encoding from SP800-38D + // + t = UINT64_NEG(H0 & 1) & ((UINT64)GF128_FIELD_R_BYTE << (8 * ( sizeof( UINT64 ) - 1 )) ) ; + H0 = (H0 >> 1) | (H1 << 63); + H1 = (H1 >> 1) ^ t; + } +} + + +// +// GHashAppendDataC +// Generic GHash routine, works on all platforms. +// +VOID +SYMCRYPT_CALL +SymCryptGHashAppendDataC( + _In_reads_( SYMCRYPT_GF128_FIELD_SIZE ) PCSYMCRYPT_GF128_ELEMENT expandedKeyTable, + _Inout_ PSYMCRYPT_GF128_ELEMENT pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + UINT64 R0, R1; + UINT64 mask; + SYMCRYPT_ALIGN UINT32 state32[4]; + UINT32 t; + int i,j; + while( cbData >= SYMCRYPT_GF128_BLOCK_SIZE ) + { + R0 = R1 = 0; + + // + // We have two nested loops so that we can do most of our operations + // on 32-bit words. 64-bit rotates/shifts can be really slow on a 32-bit CPU. + // On AMD64 we use the XMM version which is much faster. + // + state32[0] = (UINT32)pState->ull[0]; + state32[1] = (UINT32)(pState->ull[0] >> 32); + state32[2] = (UINT32)pState->ull[1]; + state32[3] = (UINT32)(pState->ull[1] >> 32); + for( i=0; i<4; i++ ) + { + t = SYMCRYPT_LOAD_MSBFIRST32( &pbData[4*i] ) ^ state32[3-i]; + for( j=31; j>=0; j-- ) + { + mask = (UINT64)( -(INT64)(t & 1 )); + R0 ^= expandedKeyTable[32*i+j].ull[0] & mask; + R1 ^= expandedKeyTable[32*i+j].ull[1] & mask; + t >>= 1; + } + } + pState->ull[0] = R0; + pState->ull[1] = R1; + pbData += SYMCRYPT_GF128_BLOCK_SIZE; + cbData -= SYMCRYPT_GF128_BLOCK_SIZE; + } + + SymCryptWipeKnownSize( state32, sizeof( state32 ) ); +} + + +VOID +SYMCRYPT_CALL +SymCryptGHashResult( + _In_ PCSYMCRYPT_GF128_ELEMENT pState, + _Out_writes_( SYMCRYPT_GF128_BLOCK_SIZE ) PBYTE pbResult ) +{ + SYMCRYPT_STORE_MSBFIRST64( pbResult , pState->ull[1] ); + SYMCRYPT_STORE_MSBFIRST64( pbResult + 8, pState->ull[0] ); +} + +//////////////////////////////////////////////////////////////////////////////////////////// +// XMM code +// + +VOID +SYMCRYPT_CALL +SymCryptGHashExpandKeyXmm( + _Out_writes_( SYMCRYPT_GF128_FIELD_SIZE ) PSYMCRYPT_GF128_ELEMENT expandedKey, + _In_reads_( SYMCRYPT_GF128_BLOCK_SIZE ) PCBYTE pH ) +{ + // + // We use the same layout for XMM code as we did for C code, so we can use the same key + // expansion code. + // Improvement: we can add an expansion routine that uses the XMM registers for speed. + // + + SymCryptGHashExpandKeyC( expandedKey, pH ); +} + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + +#ifdef __clang__ +#pragma clang attribute push (__attribute__((target("sse2"))), apply_to=function) +#else +#pragma GCC push_options +#pragma GCC target("sse2") +#endif + +// +// The XMM-based GHash append data function, only on AMD64 & X86 +// +VOID +SYMCRYPT_CALL +SymCryptGHashAppendDataXmm( + _In_reads_( SYMCRYPT_GF128_FIELD_SIZE ) PCSYMCRYPT_GF128_ELEMENT expandedKeyTable, + _Inout_ PSYMCRYPT_GF128_ELEMENT pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + __m128i R; + __m128i cmpValue; + __m128i mask; + __m128i T; + __m128i tmp; + + PCSYMCRYPT_GF128_ELEMENT p; + PCSYMCRYPT_GF128_ELEMENT pLimit; + UINT32 t; + int i; + + cmpValue = _mm_setzero_si128(); // cmpValue = 0 + + while( cbData >= SYMCRYPT_GF128_BLOCK_SIZE ) + { + R = _mm_setzero_si128(); + + // + // The amd64 compiler can't optimize array indices in a loop where + // you use _mm intrinsics, + // so we do all the pointer arithmetic for the compiler. + // + p = &expandedKeyTable[0]; + pLimit = &expandedKeyTable[32]; + + for( i=0; i<4; i++ ) + { + // + // Set up our XMM register with 4 identical 32-bit integers so that + // we can generate the mask from the individual bits of the 32-bit value. + // Note the use of tmp; if we assign directly to the fields of T the + // compiler no longer caches T in an XMM register, which is bad. + // + // There are XMM instructions where we can do the duplication in the XMM + // registers, but they require SSE3 support, and this code only requires + // SSE2. As the inner loop consumes most of the time, it isn't worth + // using the SSE3 instructions. + // + // Note that accessing the state as an array of UINT32s depends on the + // endianness of the CPU, but this is XMM code that only runs on + // little endian machines. + // + t = SYMCRYPT_LOAD_MSBFIRST32( &pbData[4*i] ) ^ pState->ul[3-i]; + tmp = _mm_set_epi32(t, t, t, t); + + T = tmp; + while( p < pLimit ) + { + // + // p and plimit are always at indexes that are multiples of 4 from + // the start of the array. + // We need to explain to prefast that this means that p <= pLimit - 4 + // + SYMCRYPT_ASSERT( p <= pLimit - 4 ); + + mask = _mm_cmpgt_epi32( cmpValue, T ); + T = _mm_add_epi32( T, T ); + mask = _mm_and_si128( mask, p[0].m128i ); + R = _mm_xor_si128( R, mask ); + + mask = _mm_cmpgt_epi32( cmpValue, T ); + T = _mm_add_epi32( T, T ); + mask = _mm_and_si128( mask, p[1].m128i ); + R = _mm_xor_si128( R, mask ); + + mask = _mm_cmpgt_epi32( cmpValue, T ); + T = _mm_add_epi32( T, T ); + mask = _mm_and_si128( mask, p[2].m128i ); + R = _mm_xor_si128( R, mask ); + + mask = _mm_cmpgt_epi32( cmpValue, T ); + T = _mm_add_epi32( T, T ); + mask = _mm_and_si128( mask, p[3].m128i ); + R = _mm_xor_si128( R, mask ); + + p += 4; + } + pLimit += 32; + } + + pState->m128i = R; + pbData += SYMCRYPT_GF128_BLOCK_SIZE; + cbData -= SYMCRYPT_GF128_BLOCK_SIZE; + } +} + +#ifdef __clang__ +#pragma clang attribute pop +#else +#pragma GCC pop_options +#endif + +#endif + +#if SYMCRYPT_CPU_ARM | SYMCRYPT_CPU_ARM64 +// +// The NEON-based GHash append data function, only on ARM & ARM64 +// +VOID +SYMCRYPT_CALL +SymCryptGHashAppendDataNeon( + _In_reads_( SYMCRYPT_GF128_FIELD_SIZE ) PCSYMCRYPT_GF128_ELEMENT expandedKeyTable, + _Inout_ PSYMCRYPT_GF128_ELEMENT pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + // Room for improvement: replace non-crypto NEON code below, based on a bit by bit lookup with + // pmull on 8b elements - 8x(8bx8b) -> 8x(16b) pmull is NEON instruction since Armv7 + // + // When properly unrolled: + // 1 (64bx64b -> 128b) pmull instruction and 1 eor instruction can be replaced by + // 8 (8x(8bx8b) -> 8x(16b)) pmull instructions and 8 eor instructions + // so each 128b of data could be processed by less than 64 instructions (using karatsuba) + // rather than ~512 instructions (bit by bit) + // + // Not a priority, expect that AES-GCM performance will be dominated by AES on these platforms + + __n128 R; + __n128 cmpValue; + __n128 mask; + __n128 T; + + PCSYMCRYPT_GF128_ELEMENT p; + PCSYMCRYPT_GF128_ELEMENT pLimit; + UINT32 t; + int i; + + cmpValue = vdupq_n_u32(0); // cmpValue = 0 + + while( cbData >= SYMCRYPT_GF128_BLOCK_SIZE ) + { + R = cmpValue; + + // + // Do all the pointer arithmetic for the compiler. + // + p = &expandedKeyTable[0]; + pLimit = &expandedKeyTable[32]; + + for( i=0; i<4; i++ ) + { + // + // Set up our XMM register with 4 identical 32-bit integers so that + // we can generate the mask from the individual bits of the 32-bit value. + // Note the use of tmp; if we assign directly to the fields of T the + // compiler no longer caches T in an XMM register, which is bad. + // + // Note that accessing the state as an array of UINT32s depends on the + // endianness of the CPU, but Arm code is always expected to execute in + // little endian mode. + // + t = SYMCRYPT_LOAD_MSBFIRST32( &pbData[4*i] ) ^ pState->ul[3-i]; + T = vdupq_n_u32( t ); + + while( p < pLimit ) + { + // + // p and plimit are always at indexes that are multiples of 4 from + // the start of the array. + // We need to explain to prefast that this means that p <= pLimit - 4 + // + SYMCRYPT_ASSERT( p <= pLimit - 4 ); + + mask = vcgtq_s32( cmpValue, T ); + T = vaddq_u32( T, T ); + mask = vandq_u32( mask, p[0].n128 ); + R = veorq_u32( R, mask ); + + mask = vcgtq_s32( cmpValue, T ); + T = vaddq_u32( T, T ); + mask = vandq_u32( mask, p[1].n128 ); + R = veorq_u32( R, mask ); + + mask = vcgtq_s32( cmpValue, T ); + T = vaddq_u32( T, T ); + mask = vandq_u32( mask, p[2].n128 ); + R = veorq_u32( R, mask ); + + mask = vcgtq_s32( cmpValue, T ); + T = vaddq_u32( T, T ); + mask = vandq_u32( mask, p[3].n128 ); + R = veorq_u32( R, mask ); + + p += 4; + } + pLimit += 32; + } + + pState->n128 = R; + pbData += SYMCRYPT_GF128_BLOCK_SIZE; + cbData -= SYMCRYPT_GF128_BLOCK_SIZE; + } +} +#endif + + +////////////////////////////////////////////////////////////////////////////////////// +// Pclmulqdq implementation +// + +/* +GHASH GF(2^128) multiplication using PCLMULQDQ + +The GF(2^128) field used in GHASH is GF(2)[x]/p(x) where p(x) is the primitive polynomial + x^128 + x^7 + x^2 + x + 1 + +Notation: We use the standard mathematical notation '+' for the addition in the field, +which corresponds to a xor of the bits. + +Multiplication: +Given two field elements A and B (represented as 128-bit values), +we first compute the polynomial product + (C,D) := A * B +where C and D are also 128-bit values. + +The PCLMULQDQ instruction performs a 64 x 64 -> 128 bit carryless multiplication. +To multiply 128-bit values we write A = (A1, A0) and B = (B1, B0) in two 64-bit halves. + +The schoolbook multiplication is computed by + (C, D) = (A1 * B1)x^128 + (A1 * B0 + A0 * B1)x^64 + (A0 * B0) +This require four PCLMULQDQ instructions. The middle 128-bit result has to be shifted +left and right, and each half added to the upper and lower 128-bit result to get (C,D). + +Alternatively, the middle 128-bit intermediate result be computed using Karatsuba: + (A1*B0 + A0*B1) = (A1 + A0) * (B1 + B0) + (A1*B1) + (A0*B0) +This requires only one PCLMULQDQ instruction to multiply (A1 + A0) by (B1 + B0) +as the other two products are already computed. +Whether this is faster depends on the relative speed of shift/xor verses PCLMULQDQ. + +Both multiplication algorithms produce three 128-bit intermediate results (R1, Rmid, R0), +with the full result defined by R1 x^128 + Rmid x^64 + R0. +If we do Multiply-Accumulate then we can accumulate the three 128-bit intermediate results +directly. As there are no carries, there is no overflow, and the combining of the three +intermediate results into a 256-bit result can be shared amongst all multiplications. + + +Modulo reduction: +We use << and >> to denote shifts on 128-bit values. +The modulo reduction can now be done as follows: +given a 256-bit value (C,D) representing C x^128 + D we compute + (T1,T0) := C + C*x + C * x^2 + C * x^7 + R := D + T0 + T1 + (T1 << 1) + (T1 << 2) + (T1 << 7) + +(T1,T0) is just the value C x^128 reduced one step modulo p(x).The value T1 is at most 7 bits, +so in the next step the reduction, which computes the result R, is easy. The +expression T1 + (T1 << 1) + (T1 << 2) + (T1 << 7) is just T1 * x^128 reduced modulo p(x). + +Let's first get rid of the polynomial arithmetic and write this completely using shifts on +128-bit values. + +T0 := C + (C << 1) + (C << 2) + (C << 7) +T1 := (C >> 127) + (C >> 126) + (C >> 121) +R := D + T0 + T1 + (T1 << 1) + (T1 << 2) + (T1 << 7) + +We can optimize this by rewriting the equations + +T2 := T1 + C + = C + (C>>127) + (C>>126) + (C>>121) +R = D + T0 + T1 + (T1 << 1) + (T1 << 2) + (T1 << 7) + = D + C + (C << 1) + (C << 2) + (C << 7) + T1 + (T1 << 1) + (T1 << 2) + (T1 << 7) + = D + T2 + (T2 << 1) + (T2 << 2) + (T2 << 7) + +Thus +T2 = C + (C>>127) + (C>>126) + (C>>121) +R = D + T2 + (T2 << 1) + (T2 << 2) + (T2 << 7) + +Gets the right result and uses only 6 shifts. + +The SSE instruction set does not implement bit-shifts of 128-bit values. Instead, we will +use bit-shifts of the 32-bit subvalues, and byte shifts (shifts by a multiple of 8 bits) +on the full 128-bit values. +We use the <<<< and >>>> operators to denote shifts on 32-bit subwords. + +We can now do the modulo reduction by + +t1 := (C >> 127) = (C >>>> 31) >> 96 +t2 := (C >> 126) = (C >>>> 30) >> 96 +t3 := (C >> 121) = (C >>>> 25) >> 96 +T2 = C + t1 + t2 + t3 + +left-shifts in the computation of R are a bit more involved as we have to move bits from +one subword to the next + +u1 := (T2 << 1) = (T2 <<<< 1) + ((T2 >>>> 31) << 32) +u2 := (T2 << 2) = (T2 <<<< 2) + ((T2 >>>> 30) << 32) +u3 := (T2 << 7) = (T2 <<<< 7) + ((T2 >>>> 25) << 32) +R = D + T2 + u1 + u2 + u3 + +We can eliminate some common subexpressions. For any k we have +(T2 >>>> k) = ((C + r) >>>> k) +where r is a 7-bit value. If k>7 then this is equal to (C >>>> k). This means that +the value (T2 >>>> 31) is equal to (C >>>> 31) so we don't have to compute it again. + +So we can rewrite our formulas as +t4 := (C >>>> 31) +t5 := (C >>>> 30) +t6 := (C >>>> 25) +ts = t4 + t5 + t6 +T2 = C + (ts >> 96) + +Note that ts = (C >>>> 31) + (C >>>> 30) + (C >>>> 25) +which is equal to (T2 >>>> 31) + (T2 >>>> 30) + (T2 >>>> 25) + +R = D + T2 + u1 + u2 + u3 + = D + T2 + (T2 <<<< 1) + (T2 <<<< 2) + (T2 <<<< 7) + (ts << 32) + +All together, we can do the modulo reduction using the following formulas + +ts := (C >>>> 31) + (C >>>> 30) + (C >>>> 25) +T2 := C + (ts >> 96) +R = D + T2 + (T2 <<<< 1) + (T2 <<<< 2) + (T2 <<<< 7) + (ts << 32) + +Using a total of 16 operations. (6 subword shifts, 2 byte shifts, and 8 additions) + +Reversed bit order: +There is one more complication. GHASH uses the bits in the reverse order from normal representation. +The bits b_0, b_1, ..., b_127 represent the polynomial b_0 + b_1 * x + ... + b_127 * x^127. +This means that the most significant bit in each byte is actually the least significant bit in the +polynomial. + +SSE CPUs use the LSBFirst convention. This means that the bits b_0, b_1, ..., b_127 of the polynomial +end up at positions 7, 6, 5, ..., 1, 0, 15, 14, ..., 9, 8, 23, 22, ... of our XMM register. +This is obviously not a useful representation to do arithmetic in. +The first step is to BSWAP the value so that the bits appear in pure reverse order. +That is at least algebraically useful. + +To compute the multiplication we use the fact that GF(2)[x] multiplication has no carries and +thus no preference for bit order. After the BSWAP we don't have the values A and B, but rather +rev(A) and rev(B) where rev() is a function that reverses the bit order. We can now compute + + rev(A) * rev(B) = rev( A*B ) >> 1 + +where the shift operator is on the 256-bit product. + +The modulo reduction remains the same, except that we change all the shifts to be the other direction. + +This gives us finally the outline of our multiplication: + +- Apply BSWAP to all values loaded from memory. + A := BSWAP( Abytes ) + B := BSWAP( Bbytes ) +- Compute the 256-bit product, possibly using Karatsuba. + (P1, P0) := A * B // 128x128 carryless multiplication +- Shift the result left one bit. + (Q1, Q0) := (P1, P0) << 1 + which is computed as + Q0 = (P0 <<<< 1) + (P0 >>>> 31) << 32 + Q1 = (P1 <<<< 1) + (P1 >>>> 31) << 32 + (P0 >>>> 31) >> 96 +- Perform the modulo reduction, with reversed bit order + ts := (Q0 <<<< 31) + (Q0 <<<< 30) + (Q0 <<<< 25) + T2 := Q0 + (ts << 96) + R = Q1 + T2 + (T2 >>>> 1) + (T2 >>>> 2) + (T2 >>>> 7) + (ts >> 32) + +Future work: +It might be possible to construct a faster solution by merging the leftshift of (P1,P0) +with the modulo reduction. + +*/ + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + +#ifdef __clang__ +#pragma clang attribute push (__attribute__((target("ssse3,pclmul"))), apply_to=function) +#else +#pragma GCC push_options +#pragma GCC target("ssse3,pclmul") +#endif + +VOID +SYMCRYPT_CALL +SymCryptGHashExpandKeyPclmulqdq( + _Out_writes_( SYMCRYPT_GF128_FIELD_SIZE ) PSYMCRYPT_GF128_ELEMENT expandedKey, + _In_reads_( SYMCRYPT_GF128_BLOCK_SIZE ) PCBYTE pH ) +{ + int i; + __m128i H, Hx, H2, H2x; + __m128i t0, t1, t2, t3, t4, t5; + __m128i Hi_even, Hix_even, Hi_odd, Hix_odd; + __m128i BYTE_REVERSE_ORDER = _mm_set_epi8( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ); + __m128i vMultiplicationConstant = _mm_set_epi32( 0, 0, 0xc2000000, 0 ); + + // + // Our expanded key consists of a list of N=SYMCRYPT_GHASH_PCLMULQDQ_HPOWERS + // powers of H. The first entry is H^N, the next H^(N-1), then H^(N-2), ... + // + // For each power we store two 128-bit values. The first is H^i (Hi) and the second + // contains the two halves of H^i xorred with each other in the lower 64 bits (Hix). + // + // We keep all of the Hi entries together in the first half of the expanded key + // table, and all of the Hix entries together in the second half of the table. + // + // This ordering allow for efficient vectorization with arbitrary vector width, as + // many multiplication constants can be loaded into wider vectors with the correct + // alignment. Not maintaining different layouts for different vector lengths does + // leave a small amount of performance on the table, but experimentally it seems to + // <1% difference, and using a single layout reduces complexity significantly. + // + C_ASSERT( 2*SYMCRYPT_GHASH_PCLMULQDQ_HPOWERS <= SYMCRYPT_GF128_FIELD_SIZE ); + + H = _mm_loadu_si128((__m128i *) pH ); + H = _mm_shuffle_epi8( H, BYTE_REVERSE_ORDER ); + Hx = _mm_xor_si128( H, _mm_srli_si128( H, 8 ) ); + + _mm_store_si128( &GHASH_H_POWER(expandedKey, 1), H ); + _mm_store_si128( &GHASH_Hx_POWER(expandedKey, 1), Hx ); + + CLMUL_X_3( H, Hx, H, Hx, t0, t1, t2 ); + CLMUL_3_POST( t0, t1, t2 ); + MODREDUCE( vMultiplicationConstant, t0, t1, t2, H2 ); + H2x = _mm_xor_si128( H2, _mm_srli_si128( H2, 8 ) ); + _mm_store_si128( &GHASH_H_POWER(expandedKey, 2), H2 ); + _mm_store_si128( &GHASH_Hx_POWER(expandedKey, 2), H2x ); + + Hi_even = H2; + Hix_even = H2x; + + for( i=2; i<SYMCRYPT_GHASH_PCLMULQDQ_HPOWERS; i+=2 ) + { + CLMUL_X_3( H, Hx, Hi_even, Hix_even, t0, t1, t2 ); + CLMUL_3_POST( t0, t1, t2 ); + CLMUL_X_3( H2, H2x, Hi_even, Hix_even, t3, t4, t5 ); + CLMUL_3_POST( t3, t4, t5 ); + MODREDUCE( vMultiplicationConstant, t0, t1, t2, Hi_odd ); + MODREDUCE( vMultiplicationConstant, t3, t4, t5, Hi_even ); + Hix_odd = _mm_xor_si128( Hi_odd, _mm_srli_si128( Hi_odd, 8 ) ); + Hix_even = _mm_xor_si128( Hi_even, _mm_srli_si128( Hi_even, 8 ) ); + + _mm_store_si128( &GHASH_H_POWER(expandedKey, i + 1), Hi_odd ); + _mm_store_si128( &GHASH_H_POWER(expandedKey, i + 2), Hi_even ); + _mm_store_si128( &GHASH_Hx_POWER(expandedKey, i + 1), Hix_odd ); + _mm_store_si128( &GHASH_Hx_POWER(expandedKey, i + 2), Hix_even ); + } +} + + + +VOID +SYMCRYPT_CALL +SymCryptGHashAppendDataPclmulqdq( + _In_reads_( SYMCRYPT_GF128_FIELD_SIZE ) PCSYMCRYPT_GF128_ELEMENT expandedKeyTable, + _Inout_ PSYMCRYPT_GF128_ELEMENT pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + __m128i state; + __m128i data; + __m128i a0, a1, a2; + __m128i Hi, Hix; + SIZE_T i; + SIZE_T nBlocks = cbData / SYMCRYPT_GF128_BLOCK_SIZE; + SIZE_T todo; + + // + // To do a BSWAP we need an __m128i value with the bytes + // + + __m128i BYTE_REVERSE_ORDER = _mm_set_epi8( + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ); + __m128i vMultiplicationConstant = _mm_set_epi32( 0, 0, 0xc2000000, 0 ); + + state = _mm_loadu_si128( (__m128i *) pState ); + + while( nBlocks > 0 ) + { + // + // We process the data in blocks of up to SYMCRYPT_GHASH_PCLMULQDQ_HPOWERS blocks + // + todo = SYMCRYPT_MIN( nBlocks, SYMCRYPT_GHASH_PCLMULQDQ_HPOWERS ); + + // + // The first block is xorred with the state before multiplying it with a power of H + // + data = _mm_loadu_si128( (__m128i *) pbData ); + data = _mm_shuffle_epi8( data, BYTE_REVERSE_ORDER ); + pbData += SYMCRYPT_GF128_BLOCK_SIZE; + + state = _mm_xor_si128( state, data ); + CLMUL_3( state, GHASH_H_POWER(expandedKeyTable, todo), GHASH_Hx_POWER(expandedKeyTable, todo), a0, a1, a2 ); + + // + // Then we just do an improduct + // + for( i=1; i<todo; i++ ) + { + data = _mm_loadu_si128( (__m128i *) pbData ); + data = _mm_shuffle_epi8( data, BYTE_REVERSE_ORDER ); + pbData += SYMCRYPT_GF128_BLOCK_SIZE; + + Hi = _mm_load_si128( &GHASH_H_POWER(expandedKeyTable, todo - i) ); + Hix = _mm_load_si128( &GHASH_Hx_POWER(expandedKeyTable, todo - i) ); + CLMUL_ACC_3( data, Hi, Hix, a0, a1, a2 ); + } + + CLMUL_3_POST( a0, a1, a2 ); + MODREDUCE( vMultiplicationConstant, a0, a1, a2, state ); + nBlocks -= todo; + } + + _mm_storeu_si128((__m128i *)pState, state ); +} + +#ifdef __clang__ +#pragma clang attribute pop +#else +#pragma GCC pop_options +#endif + +#endif // CPU_X86 || CPU_AMD64 + +#if SYMCRYPT_CPU_ARM64 + +#ifdef __clang__ +#pragma clang attribute push (__attribute__((target("aes"))), apply_to=function) +#else +#pragma GCC push_options +#pragma GCC target("aes") +#endif + +VOID +SYMCRYPT_CALL +SymCryptGHashExpandKeyPmull( + _Out_writes_( SYMCRYPT_GF128_FIELD_SIZE ) PSYMCRYPT_GF128_ELEMENT expandedKey, + _In_reads_( SYMCRYPT_GF128_BLOCK_SIZE ) PCBYTE pH ) +{ + int i; + __n128 H, Hx, H2, H2x; + __n128 t0, t1, t2, t3, t4, t5; + __n128 Hi_even, Hix_even, Hi_odd, Hix_odd; + const __n64 vMultiplicationConstant = SYMCRYPT_SET_N64_U64(0xc200000000000000); + // + // Our expanded key consists of a list of N=SYMCRYPT_GHASH_PMULL_HPOWERS + // powers of H. The first entry is H^N, the next H^(N-1), then H^(N-2), ... + // + // For each power we store two 128-bit values. The first is H^i (Hi) and the second + // contains the two halves of H^i xorred with each other in the lower 64 bits (Hix). + // + // We keep all of the Hi entries together in the first half of the expanded key + // table, and all of the Hix entries together in the second half of the table. + // + // This ordering allow for efficient vectorization with arbitrary vector width, as + // many multiplication constants can be loaded into wider vectors with the correct + // alignment. Not maintaining different layouts for different vector lengths does + // leave a small amount of performance on the table, but experimentally it seems to + // <1% difference, and using a single layout reduces complexity significantly. + // + C_ASSERT( 2*SYMCRYPT_GHASH_PMULL_HPOWERS <= SYMCRYPT_GF128_FIELD_SIZE ); + + H = *(__n128 *) pH; + Hx = vrev64q_u8( H ); + H = vextq_u8( Hx, Hx, 8 ); + Hx = veorq_u8( H, Hx ); + + GHASH_H_POWER(expandedKey, 1) = H; + GHASH_Hx_POWER(expandedKey, 1) = Hx; + + CLMUL_X_3( H, Hx, H, Hx, t0, t1, t2 ); + CLMUL_3_POST( t0, t1, t2 ); + MODREDUCE( vMultiplicationConstant, t0, t1, t2, H2 ); + H2x = veorq_u8( H2, vextq_u8( H2, H2, 8 ) ); + GHASH_H_POWER(expandedKey, 2) = H2; + GHASH_Hx_POWER(expandedKey, 2) = H2x; + + Hi_even = H2; + Hix_even = H2x; + + for( i=2; i<SYMCRYPT_GHASH_PMULL_HPOWERS; i+=2 ) + { + CLMUL_X_3( H, Hx, Hi_even, Hix_even, t0, t1, t2 ); + CLMUL_3_POST( t0, t1, t2 ); + CLMUL_X_3( H2, H2x, Hi_even, Hix_even, t3, t4, t5 ); + CLMUL_3_POST( t3, t4, t5 ); + MODREDUCE( vMultiplicationConstant, t0, t1, t2, Hi_odd ); + MODREDUCE( vMultiplicationConstant, t3, t4, t5, Hi_even ); + Hix_odd = veorq_u8( Hi_odd, vextq_u8( Hi_odd, Hi_odd, 8 ) ); + Hix_even = veorq_u8( Hi_even, vextq_u8( Hi_even, Hi_even, 8 ) ); + + GHASH_H_POWER(expandedKey, i + 1) = Hi_odd; + GHASH_H_POWER(expandedKey, i + 2) = Hi_even; + GHASH_Hx_POWER(expandedKey, i + 1) = Hix_odd; + GHASH_Hx_POWER(expandedKey, i + 2) = Hix_even; + } +} + +VOID +SYMCRYPT_CALL +SymCryptGHashAppendDataPmull( + _In_reads_( SYMCRYPT_GF128_FIELD_SIZE ) PCSYMCRYPT_GF128_ELEMENT expandedKeyTable, + _Inout_ PSYMCRYPT_GF128_ELEMENT pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + __n128 state; + __n128 data, datax; + __n128 a0, a1, a2; + __n128 Hi, Hix; + const __n64 vMultiplicationConstant = SYMCRYPT_SET_N64_U64(0xc200000000000000); + SIZE_T i; + SIZE_T nBlocks = cbData / SYMCRYPT_GF128_BLOCK_SIZE; + SIZE_T todo; + + state = *(__n128 *) pState; + + while( nBlocks > 0 ) + { + // + // We process the data in blocks of up to SYMCRYPT_GHASH_PMULL_HPOWERS blocks + // + todo = SYMCRYPT_MIN( nBlocks, SYMCRYPT_GHASH_PMULL_HPOWERS ); + + // + // The first block is xorred with the state before multiplying it with a power of H + // + data = *(__n128 *)pbData; + REVERSE_BYTES( data, data ); + pbData += SYMCRYPT_GF128_BLOCK_SIZE; + + state = veorq_u8( state, data ); + CLMUL_3( state, GHASH_H_POWER(expandedKeyTable, todo), GHASH_Hx_POWER(expandedKeyTable, todo), a0, a1, a2 ); + + // + // Then we just do an improduct + // + for( i=1; i<todo; i++ ) + { + // we can avoid an EXT here by precomputing datax for CLMUL_ACCX_3 + datax = vrev64q_u8( *(__n128 *)pbData ); + data = vextq_u8( datax, datax, 8 ); + datax = veorq_u8( data, datax ); + pbData += SYMCRYPT_GF128_BLOCK_SIZE; + + Hi = GHASH_H_POWER(expandedKeyTable, todo - i); + Hix = GHASH_Hx_POWER(expandedKeyTable, todo - i); + CLMUL_ACCX_3( data, datax, Hi, Hix, a0, a1, a2 ); + } + + CLMUL_3_POST( a0, a1, a2 ); + MODREDUCE( vMultiplicationConstant, a0, a1, a2, state ); + nBlocks -= todo; + } + + *(__n128 *) pState = state; +} + +#ifdef __clang__ +#pragma clang attribute pop +#else +#pragma GCC pop_options +#endif + +#endif // CPU_ARM64 + + + +////////////////////////////////////////////////////////////// +// Stuff around the core algorithm implementation functions +// + + +VOID +SYMCRYPT_CALL +SymCryptGHashExpandKey( + _Out_ PSYMCRYPT_GHASH_EXPANDED_KEY expandedKey, + _In_reads_( SYMCRYPT_GF128_BLOCK_SIZE ) PCBYTE pH ) +{ +#if SYMCRYPT_CPU_X86 + PSYMCRYPT_GF128_ELEMENT pExpandedKeyTable; + SYMCRYPT_EXTENDED_SAVE_DATA SaveData; + + // + // Initialize offset into table space for 16-alignment. + // + expandedKey->tableOffset = (0 -((UINT_PTR) &expandedKey->tableSpace[0])) % sizeof(SYMCRYPT_GF128_ELEMENT); + + pExpandedKeyTable = (PSYMCRYPT_GF128_ELEMENT)&expandedKey->tableSpace[expandedKey->tableOffset]; + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_PCLMULQDQ_CODE ) ) + { + // + // We can only use the PCLMULQDQ data representation if the SaveXmm never fails. + // This is one of the CPU features required. + // We check anyway... + // + if( SymCryptSaveXmm( &SaveData ) != SYMCRYPT_NO_ERROR ) + { + SymCryptFatal( 'pclm' ); + } + SymCryptGHashExpandKeyPclmulqdq( pExpandedKeyTable, pH ); + SymCryptRestoreXmm( &SaveData ); + } else if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_SSE2 ) && SymCryptSaveXmm( &SaveData ) == SYMCRYPT_NO_ERROR ) + { + SymCryptGHashExpandKeyXmm( pExpandedKeyTable, pH ); + SymCryptRestoreXmm( &SaveData ); + } else { + SymCryptGHashExpandKeyC( pExpandedKeyTable, pH ); + } + +#elif SYMCRYPT_CPU_AMD64 + PSYMCRYPT_GF128_ELEMENT pExpandedKeyTable; + pExpandedKeyTable = &expandedKey->table[0]; + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_PCLMULQDQ_CODE ) ) + { + SymCryptGHashExpandKeyPclmulqdq( pExpandedKeyTable, pH ); + } else if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_SSE2 ) ) + { + SymCryptGHashExpandKeyXmm( pExpandedKeyTable, pH ); + } else { + SymCryptGHashExpandKeyC( pExpandedKeyTable, pH ); + } + +#elif SYMCRYPT_CPU_ARM64 + PSYMCRYPT_GF128_ELEMENT pExpandedKeyTable; + pExpandedKeyTable = &expandedKey->table[0]; + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_NEON_PMULL ) ) + { + SymCryptGHashExpandKeyPmull( pExpandedKeyTable, pH ); + } else { + SymCryptGHashExpandKeyC( pExpandedKeyTable, pH ); + } + +#else + SymCryptGHashExpandKeyC( &expandedKey->table[0], pH ); // Default expansion (does not need alignment) +#endif +} + +VOID +SYMCRYPT_CALL +SymCryptGHashAppendData( + _In_ PCSYMCRYPT_GHASH_EXPANDED_KEY expandedKey, + _Inout_ PSYMCRYPT_GF128_ELEMENT pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ +#if SYMCRYPT_CPU_X86 + PCSYMCRYPT_GF128_ELEMENT pExpandedKeyTable; + SYMCRYPT_EXTENDED_SAVE_DATA SaveData; + + pExpandedKeyTable = (PSYMCRYPT_GF128_ELEMENT)&expandedKey->tableSpace[expandedKey->tableOffset]; + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_PCLMULQDQ_CODE ) ) + { + if( SymCryptSaveXmm( &SaveData ) != SYMCRYPT_NO_ERROR ) + { + SymCryptFatal( 'pclm' ); + } + SymCryptGHashAppendDataPclmulqdq( pExpandedKeyTable, pState, pbData, cbData ); + SymCryptRestoreXmm( &SaveData ); + } else if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_SSE2 ) && SymCryptSaveXmm( &SaveData ) == SYMCRYPT_NO_ERROR ) + { + SymCryptGHashAppendDataXmm( pExpandedKeyTable, pState, pbData, cbData ); + SymCryptRestoreXmm( &SaveData ); + } else { + SymCryptGHashAppendDataC( pExpandedKeyTable, pState, pbData, cbData ); + } + +#elif SYMCRYPT_CPU_AMD64 + PCSYMCRYPT_GF128_ELEMENT pExpandedKeyTable; + + pExpandedKeyTable = &expandedKey->table[0]; + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_PCLMULQDQ_CODE ) ) + { + SymCryptGHashAppendDataPclmulqdq( pExpandedKeyTable, pState, pbData, cbData ); + } else if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_SSE2 ) ) + { + SymCryptGHashAppendDataXmm( pExpandedKeyTable, pState, pbData, cbData ); + } else { + SymCryptGHashAppendDataC( pExpandedKeyTable, pState, pbData, cbData ); + } +#elif SYMCRYPT_CPU_ARM + PCSYMCRYPT_GF128_ELEMENT pExpandedKeyTable; + + pExpandedKeyTable = &expandedKey->table[0]; + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_NEON ) ) + { + SymCryptGHashAppendDataNeon( pExpandedKeyTable, pState, pbData, cbData ); + } else { + SymCryptGHashAppendDataC( pExpandedKeyTable, pState, pbData, cbData ); + } +#elif SYMCRYPT_CPU_ARM64 + PCSYMCRYPT_GF128_ELEMENT pExpandedKeyTable; + + pExpandedKeyTable = &expandedKey->table[0]; + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_NEON_PMULL ) ) + { + SymCryptGHashAppendDataPmull( pExpandedKeyTable, pState, pbData, cbData ); + } else if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_NEON ) ) + { + SymCryptGHashAppendDataNeon( pExpandedKeyTable, pState, pbData, cbData ); + } else { + SymCryptGHashAppendDataC( pExpandedKeyTable, pState, pbData, cbData ); + } +#else + SymCryptGHashAppendDataC( &expandedKey->table[0], pState, pbData, cbData ); +#endif +} diff --git a/libs/symcrypt/lib/ghash_definitions.h b/libs/symcrypt/lib/ghash_definitions.h new file mode 100644 index 00000000000..351ad82f9d2 --- /dev/null +++ b/libs/symcrypt/lib/ghash_definitions.h @@ -0,0 +1,472 @@ +// +// ghash_definitions.h +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +////////////////////////////////////////////////////////////////////////////// +// Constants & globals +// + +#define GF128_FIELD_R_BYTE (0xe1) +#define UINT64_NEG(x) ((UINT64)-(INT64)(x)) + + + +////////////////////////////////////////////////////////////////////////////////////// +// Pclmulqdq implementation +// + +/* +GHASH GF(2^128) multiplication using PCLMULQDQ + +The GF(2^128) field used in GHASH is GF(2)[x]/p(x) where p(x) is the primitive polynomial + x^128 + x^7 + x^2 + x + 1 + +Notation: We use the standard mathematical notation '+' for the addition in the field, +which corresponds to a xor of the bits. + +Multiplication: +Given two field elements A and B (represented as 128-bit values), +we first compute the polynomial product + (C,D) := A * B +where C and D are also 128-bit values. + +The PCLMULQDQ instruction performs a 64 x 64 -> 128 bit carryless multiplication. +To multiply 128-bit values we write A = (A1, A0) and B = (B1, B0) in two 64-bit halves. + +The schoolbook multiplication is computed by + (C, D) = (A1 * B1)x^128 + (A1 * B0 + A0 * B1)x^64 + (A0 * B0) +This require four PCLMULQDQ instructions. The middle 128-bit result has to be shifted +left and right, and each half added to the upper and lower 128-bit result to get (C,D). + +Alternatively, the middle 128-bit intermediate result be computed using Karatsuba: + (A1*B0 + A0*B1) = (A1 + A0) * (B1 + B0) + (A1*B1) + (A0*B0) +This requires only one PCLMULQDQ instruction to multiply (A1 + A0) by (B1 + B0) +as the other two products are already computed. +Whether this is faster depends on the relative speed of shift/xor verses PCLMULQDQ. + +Both multiplication algorithms produce three 128-bit intermediate results (R1, Rmid, R0), +with the full result defined by R1 x^128 + Rmid x^64 + R0. +If we do Multiply-Accumulate then we can accumulate the three 128-bit intermediate results +directly. As there are no carries, there is no overflow, and the combining of the three +intermediate results into a 256-bit result can be shared amongst all multiplications. + + +Modulo reduction: +We use << and >> to denote shifts on 128-bit values. +The modulo reduction can now be done as follows: +given a 256-bit value (C,D) representing C x^128 + D we compute + (T1,T0) := C + C*x + C * x^2 + C * x^7 + R := D + T0 + T1 + (T1 << 1) + (T1 << 2) + (T1 << 7) + +(T1,T0) is just the value C x^128 reduced one step modulo p(x).The value T1 is at most 7 bits, +so in the next step the reduction, which computes the result R, is easy. The +expression T1 + (T1 << 1) + (T1 << 2) + (T1 << 7) is just T1 * x^128 reduced modulo p(x). + +Let's first get rid of the polynomial arithmetic and write this completely using shifts on +128-bit values. + +T0 := C + (C << 1) + (C << 2) + (C << 7) +T1 := (C >> 127) + (C >> 126) + (C >> 121) +R := D + T0 + T1 + (T1 << 1) + (T1 << 2) + (T1 << 7) + +We can optimize this by rewriting the equations + +T2 := T1 + C + = C + (C>>127) + (C>>126) + (C>>121) +R = D + T0 + T1 + (T1 << 1) + (T1 << 2) + (T1 << 7) + = D + C + (C << 1) + (C << 2) + (C << 7) + T1 + (T1 << 1) + (T1 << 2) + (T1 << 7) + = D + T2 + (T2 << 1) + (T2 << 2) + (T2 << 7) + +Thus +T2 = C + (C>>127) + (C>>126) + (C>>121) +R = D + T2 + (T2 << 1) + (T2 << 2) + (T2 << 7) + +Gets the right result and uses only 6 shifts. + +The SSE instruction set does not implement bit-shifts of 128-bit values. Instead, we will +use bit-shifts of the 32-bit subvalues, and byte shifts (shifts by a multiple of 8 bits) +on the full 128-bit values. +We use the <<<< and >>>> operators to denote shifts on 32-bit subwords. + +We can now do the modulo reduction by + +t1 := (C >> 127) = (C >>>> 31) >> 96 +t2 := (C >> 126) = (C >>>> 30) >> 96 +t3 := (C >> 121) = (C >>>> 25) >> 96 +T2 = C + t1 + t2 + t3 + +left-shifts in the computation of R are a bit more involved as we have to move bits from +one subword to the next + +u1 := (T2 << 1) = (T2 <<<< 1) + ((T2 >>>> 31) << 32) +u2 := (T2 << 2) = (T2 <<<< 2) + ((T2 >>>> 30) << 32) +u3 := (T2 << 7) = (T2 <<<< 7) + ((T2 >>>> 25) << 32) +R = D + T2 + u1 + u2 + u3 + +We can eliminate some common subexpressions. For any k we have +(T2 >>>> k) = ((C + r) >>>> k) +where r is a 7-bit value. If k>7 then this is equal to (C >>>> k). This means that +the value (T2 >>>> 31) is equal to (C >>>> 31) so we don't have to compute it again. + +So we can rewrite our formulas as +t4 := (C >>>> 31) +t5 := (C >>>> 30) +t6 := (C >>>> 25) +ts = t4 + t5 + t6 +T2 = C + (ts >> 96) + +Note that ts = (C >>>> 31) + (C >>>> 30) + (C >>>> 25) +which is equal to (T2 >>>> 31) + (T2 >>>> 30) + (T2 >>>> 25) + +R = D + T2 + u1 + u2 + u3 + = D + T2 + (T2 <<<< 1) + (T2 <<<< 2) + (T2 <<<< 7) + (ts << 32) + +All together, we can do the modulo reduction using the following formulas + +ts := (C >>>> 31) + (C >>>> 30) + (C >>>> 25) +T2 := C + (ts >> 96) +R = D + T2 + (T2 <<<< 1) + (T2 <<<< 2) + (T2 <<<< 7) + (ts << 32) + +Using a total of 16 operations. (6 subword shifts, 2 byte shifts, and 8 additions) + +Reversed bit order: +There is one more complication. GHASH uses the bits in the reverse order from normal representation. +The bits b_0, b_1, ..., b_127 represent the polynomial b_0 + b_1 * x + ... + b_127 * x^127. +This means that the most significant bit in each byte is actually the least significant bit in the +polynomial. + +SSE CPUs use the LSBFirst convention. This means that the bits b_0, b_1, ..., b_127 of the polynomial +end up at positions 7, 6, 5, ..., 1, 0, 15, 14, ..., 9, 8, 23, 22, ... of our XMM register. +This is obviously not a useful representation to do arithmetic in. +The first step is to BSWAP the value so that the bits appear in pure reverse order. +That is at least algebraically useful. + +To compute the multiplication we use the fact that GF(2)[x] multiplication has no carries and +thus no preference for bit order. After the BSWAP we don't have the values A and B, but rather +rev(A) and rev(B) where rev() is a function that reverses the bit order. We can now compute + + rev(A) * rev(B) = rev( A*B ) >> 1 + +where the shift operator is on the 256-bit product. + +The modulo reduction remains the same, except that we change all the shifts to be the other direction. + +This gives us finally the outline of our multiplication: + +- Apply BSWAP to all values loaded from memory. + A := BSWAP( Abytes ) + B := BSWAP( Bbytes ) +- Compute the 256-bit product, possibly using Karatsuba. + (P1, P0) := A * B // 128x128 carryless multiplication +- Shift the result left one bit. + (Q1, Q0) := (P1, P0) << 1 + which is computed as + Q0 = (P0 <<<< 1) + (P0 >>>> 31) << 32 + Q1 = (P1 <<<< 1) + (P1 >>>> 31) << 32 + (P0 >>>> 31) >> 96 +- Perform the modulo reduction, with reversed bit order + ts := (Q0 <<<< 31) + (Q0 <<<< 30) + (Q0 <<<< 25) + T2 := Q0 + (ts << 96) + R = Q1 + T2 + (T2 >>>> 1) + (T2 >>>> 2) + (T2 >>>> 7) + (ts >> 32) + +Future work: +It might be possible to construct a faster solution by merging the leftshift of (P1,P0) +with the modulo reduction. + +*/ + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + +#define SYMCRYPT_GHASH_PCLMULQDQ_HPOWERS 32 + +#define GHASH_H_POWER( ghashTable, ind ) ( (ghashTable)[ SYMCRYPT_GHASH_PCLMULQDQ_HPOWERS - (ind)].m128i ) +#define GHASH_Hx_POWER( ghashTable, ind ) ( (ghashTable)[2*SYMCRYPT_GHASH_PCLMULQDQ_HPOWERS - (ind)].m128i ) + +// +// We define a few macros +// + +// +// CLMUL_4 multiplies two operands into three intermediate results using 4 pclmulqdq instructions +// +#define CLMUL_4( opA, opB, resl, resm, resh ) \ +{ \ + resl = _mm_clmulepi64_si128( opA, opB, 0x00 ); \ + resm = _mm_xor_si128( _mm_clmulepi64_si128( opA, opB, 0x01 ), _mm_clmulepi64_si128( opA, opB, 0x10 ) ); \ + resh = _mm_clmulepi64_si128( opA, opB, 0x11 ); \ +}; + +// +// CLMUL_3 multiplies two operands into three intermediate results using 3 pclmulqdq instructions. +// The second operand has a pre-computed difference of the two halves. +// This uses Karatsuba, but we delay xorring the high and low piece into the middle piece. +// +#define CLMUL_3( opA, opB, opBx, resl, resm, resh ) \ +{ \ + __m128i _tmpA; \ + resl = _mm_clmulepi64_si128( opA, opB, 0x00 ); \ + resh = _mm_clmulepi64_si128( opA, opB, 0x11 ); \ + _tmpA = _mm_xor_si128( opA, _mm_srli_si128( opA, 8 ) ); \ + resm = _mm_clmulepi64_si128( _tmpA, opBx, 0x00 ); \ +}; +// +// CLMUL_X_3 is as CLMUL_3 only it takes precomputed differences of both multiplicands. +// +#define CLMUL_X_3( opA, opAx, opB, opBx, resl, resm, resh ) \ +{ \ + resl = _mm_clmulepi64_si128( opA, opB, 0x00 ); \ + resh = _mm_clmulepi64_si128( opA, opB, 0x11 ); \ + resm = _mm_clmulepi64_si128( opAx, opBx, 0x00 ); \ +}; + +// +// Post-process the CLMUL_3 result to be compatible with the CLMUL_4 +// +#define CLMUL_3_POST( resl, resm, resh ) \ + resm = _mm_xor_si128( resm, _mm_xor_si128( resl, resh ) ); + +// +// Multiply-accumulate using CLMUL_4 +// +#define CLMUL_ACC_4( opA, opB, resl, resm, resh ) \ +{\ + __m128i _tmpl, _tmpm, _tmph;\ + CLMUL_4( opA, opB, _tmpl, _tmpm, _tmph );\ + resl = _mm_xor_si128( resl, _tmpl ); \ + resm = _mm_xor_si128( resm, _tmpm ); \ + resh = _mm_xor_si128( resh, _tmph ); \ +}; + +// +// Multiply-accumulate using CLMUL_3 +// +#define CLMUL_ACC_3( opA, opB, opBx, resl, resm, resh ) \ +{\ + __m128i _tmpl, _tmpm, _tmph;\ + CLMUL_3( opA, opB, opBx, _tmpl, _tmpm, _tmph );\ + resl = _mm_xor_si128( resl, _tmpl ); \ + resm = _mm_xor_si128( resm, _tmpm ); \ + resh = _mm_xor_si128( resh, _tmph ); \ +}; +#define CLMUL_ACC_3_Ymm( opA, opB, opBx, resl, resm, resh ) \ +{\ + __m256i _tmpl, _tmpm, _tmph;\ + __m256i _tmpA; \ + _tmpl = _mm256_clmulepi64_epi128( opA, opB, 0x00 ); \ + _tmph = _mm256_clmulepi64_epi128( opA, opB, 0x11 ); \ + _tmpA = _mm256_xor_si256( opA, _mm256_srli_si256( opA, 8 ) ); \ + _tmpm = _mm256_clmulepi64_epi128( _tmpA, opBx, 0x00 ); \ + resl = _mm256_xor_si256( resl, _tmpl ); \ + resm = _mm256_xor_si256( resm, _tmpm ); \ + resh = _mm256_xor_si256( resh, _tmph ); \ +}; + + +// +// Convert the 3 intermediate results to a 256-bit result, +// and do the modulo reduction. +#define MODREDUCE( vMultiplicationConstant, rl, rm, rh, res ) \ +{\ + __m128i _T0, _T1; \ +\ + /* multiply rl by constant which is (rev(0x87) << 1) - we'll eor the lost high bit in manually */ \ + _T0 = _mm_clmulepi64_si128( rl, vMultiplicationConstant, 0x00 ); \ +\ + /* we want the high 64b of rl to align with the low 64b of rm, because we haven't merged rm into rl and rh */ \ + /* we want the low 64b of rl to align with the high 64b of rm, because we lost the high bit in the previous pmull */ \ + rl = _mm_shuffle_epi32( rl, _MM_SHUFFLE( 1, 0, 3, 2 ) ); \ +\ + rm = _mm_xor_si128( rm, _T0 ); \ + rm = _mm_xor_si128( rm, rl ); \ +\ + /* almost same again to fold rm into rh, but bit 63 needs no more multiplication and the result ultimately needs shifting left by 1 */ \ + /* pre-shift bottom of rm left by 1 and accumulate the result when the other parts are aligned */ \ + _T0 = _mm_clmulepi64_si128( _mm_slli_epi64( rm, 1 ), vMultiplicationConstant, 0x00 ); \ +\ + rm = _mm_shuffle_epi32( rm, _MM_SHUFFLE( 1, 0, 3, 2 ) ); \ + res = _mm_xor_si128( rh, rm ); \ +\ + /* rotate res left by 1 and accumulate the aligned parts */ \ + _T1 = _mm_slli_epi32( res, 1 ); \ + res = _mm_srli_epi32( res, 31 ); \ +\ + _T0 = _mm_xor_si128( _T0, _T1 ); \ + res = _mm_shuffle_epi32( res, _MM_SHUFFLE( 2, 1, 0, 3 ) ); \ +\ + res = _mm_xor_si128( res, _T0 ); \ +}; + +// +// See the large comment above on how this is done. +// When we want to do MODREDUCE in parallel with other work, making use of pclmuldq to reduce +// total instruction count (and register pressure) is beneficial. When testing on Haswell, +// using the newer approach is beneficial. Keeping the old approach around in case we have significant +// regression on older platforms. +// +#define MODREDUCE_OLD( rl, rm, rh, res ) \ +{\ + __m128i _T0, _T1, _T2, _Q0, _Q1; \ + rl = _mm_xor_si128( rl, _mm_slli_si128( rm, 8 ) ); \ + rh = _mm_xor_si128( rh, _mm_srli_si128( rm, 8 ) ); \ +\ + _Q0 = _mm_slli_epi32( rl, 1 ); \ + _Q1 = _mm_slli_epi32( rh, 1 ); \ +\ + _T0 = _mm_srli_epi32( rl, 31 ); \ + _T1 = _mm_srli_epi32( rh, 31 ); \ +\ + _T1 = _mm_alignr_epi8( _T1, _T0, 12 ); \ + _T0 = _mm_slli_si128( _T0, 4 ); \ +\ + _Q0 = _mm_xor_si128( _Q0, _T0 ); \ + _Q1 = _mm_xor_si128( _Q1, _T1 ); \ +\ + _T0 = _mm_slli_epi32( _Q0, 31 ); \ + _T1 = _mm_slli_epi32( _Q0, 30 ); \ + _T2 = _mm_slli_epi32( _Q0, 25 ); \ + _T0 = _mm_xor_si128( _T0, _T1 ); \ + _T0 = _mm_xor_si128( _T0, _T2 ); \ +\ + _T1 = _mm_slli_si128( _T0, 12 ); \ +\ + _T2 = _mm_xor_si128( _Q0, _T1 ); \ +\ + res = _mm_xor_si128( _Q1, _T2 ); \ + _T1 = _mm_srli_si128( _T0, 4 ); \ + res = _mm_xor_si128( res, _T1 ); \ +\ + _T0 = _mm_srli_epi32( _T2, 1 ); \ + _T1 = _mm_srli_epi32( _T2, 2 ); \ + _T2 = _mm_srli_epi32( _T2, 7 ); \ +\ + _T1 = _mm_xor_si128( _T0, _T1 ); \ + res = _mm_xor_si128( res, _T2 ); \ + res = _mm_xor_si128( res, _T1 ); \ +}; + +#endif // CPU_X86 || CPU_AMD64 + +#if SYMCRYPT_CPU_ARM64 + +#define SYMCRYPT_GHASH_PMULL_HPOWERS 32 + +#define GHASH_H_POWER( ghashTable, ind ) ( (ghashTable)[ SYMCRYPT_GHASH_PMULL_HPOWERS - (ind)].n128 ) +#define GHASH_Hx_POWER( ghashTable, ind ) ( (ghashTable)[2*SYMCRYPT_GHASH_PMULL_HPOWERS - (ind)].n128 ) + +#if SYMCRYPT_MS_VC +#ifndef vshl_n_u64 +#define vshl_n_u64(src1, src2) neon_shlis64(src1, src2) +#endif +#endif +// +// CLMUL_4 multiplies two operands into three intermediate results using 4 pmull instructions +// +#define CLMUL_4( opA, opB, resl, resm, resh ) \ +{ \ + __n128 _tmp; \ + resl = vmullq_p64( opA, opB ); \ + _tmp = vextq_u8( opA, opA, 8 ); \ + resm = veorq_u8( vmullq_p64( opB, _tmp ), vmull_high_p64( opB, _tmp ) );\ + resh = vmull_high_p64( opA, opB ); \ +}; + +// +// CLMUL_3 multiplies two operands into three intermediate results using 3 pmull instructions. +// The second operand has a pre-computed difference of the two halves. +// This uses Karatsuba, but we delay xorring the high and low piece into the middle piece. +// +#define CLMUL_3( opA, opB, opBx, resl, resm, resh ) \ +{ \ + __n128 _tmpA; \ + resl = vmullq_p64( opA, opB ); \ + resh = vmull_high_p64( opA, opB ); \ + _tmpA = veorq_u8( opA, vextq_u8( opA, opA, 8 ) ); \ + resm = vmullq_p64( _tmpA, opBx ); \ +}; +// +// CLMUL_X_3 is as CLMUL_3 only it takes precomputed differences of both multiplicands +// +#define CLMUL_X_3( opA, opAx, opB, opBx, resl, resm, resh ) \ +{ \ + resl = vmullq_p64( opA, opB ); \ + resh = vmull_high_p64( opA, opB ); \ + resm = vmullq_p64( opAx, opBx ); \ +}; + +// +// Post-process the CLMUL_3 result to be compatible with the CLMUL_4 +// +#define CLMUL_3_POST( resl, resm, resh ) \ + resm = veorq_u8( resm, veorq_u8( resl, resh ) ); + +// +// Multiply-accumulate using CLMUL_4 +// +#define CLMUL_ACC_4( opA, opB, resl, resm, resh ) \ +{\ + __n128 _tmpl, _tmpm, _tmph;\ + CLMUL_4( opA, opB, _tmpl, _tmpm, _tmph );\ + resl = veorq_u8( resl, _tmpl ); \ + resm = veorq_u8( resm, _tmpm ); \ + resh = veorq_u8( resh, _tmph ); \ +}; + +// +// Multiply-accumulate two operands into 3 accumulators. +// Takes the multiplicands and the pre-computed differences of the two halves of both multiplicands. +// +#define CLMUL_ACCX_3( opA, opAx, opB, opBx, resl, resm, resh ) \ +{\ + __n128 _tmpl, _tmpm, _tmph;\ + CLMUL_X_3( opA, opAx, opB, opBx, _tmpl, _tmpm, _tmph ); \ + resl = veorq_u8( resl, _tmpl ); \ + resm = veorq_u8( resm, _tmpm ); \ + resh = veorq_u8( resh, _tmph ); \ +}; + + +// +// Convert the 3 intermediate results to a 256-bit result, +// and do the modulo reduction. +// See the large comment above on how this is done. +// +#define MODREDUCE( vMultiplicationConstant, rl, rm, rh, res ) \ +{\ + __n128 _T0, _T1; \ +\ + /* multiply rl by constant which is (rev(0x87) << 1) - we'll eor the lost high bit in manually */ \ + _T0 = vmull_p64( vget_low_p64(rl), vMultiplicationConstant ); \ +\ + /* we want the high 64b of rl to align with the low 64b of rm, because we haven't merged rm into rl and rh */ \ + /* we want the low 64b of rl to align with the high 64b of rm, because we lost the high bit in the previous pmull */ \ + rl = vextq_u8( rl, rl, 8 ); \ +\ + rm = veorq_u8( rm, _T0 ); \ + rm = veorq_u8( rm, rl ); \ +\ + /* almost same again to fold rm into rh, but bit 63 needs no more multiplication and the result ultimately needs shifting left by 1 */ \ + /* pre-shift bottom of rm left by 1 and accumulate the result when the other parts are aligned */ \ + _T0 = vmull_p64( vshl_n_u64(vget_low_p64(rm), 1), vMultiplicationConstant ); \ +\ + rm = vextq_u8( rm, rm, 8 ); \ + res = veorq_u8( rh, rm ); \ +\ + /* rotate res left by 1 and accumulate the aligned parts */ \ + _T1 = vshlq_n_u32( res, 1 ); \ + res = vshrq_n_u32( res, 31 ); \ +\ + _T0 = veorq_u8( _T0, _T1 ); \ + res = vextq_u8( res, res, 12 ); \ +\ + res = veorq_u8( res, _T0 ); \ +}; + +#define REVERSE_BYTES( _in, _out )\ +{\ + __n128 _t;\ + _t = vrev64q_u8( _in ); \ + _out = vextq_u8( _t, _t, 8 ); \ +} + +#endif // CPU_ARM64 diff --git a/libs/symcrypt/lib/hash.c b/libs/symcrypt/lib/hash.c new file mode 100644 index 00000000000..a0420c35964 --- /dev/null +++ b/libs/symcrypt/lib/hash.c @@ -0,0 +1,216 @@ +// +// hash.c generic code used in many hash implementations. +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +VOID +SYMCRYPT_CALL +SymCryptHashAppendInternal( + _In_ PCSYMCRYPT_HASH pHash, + _Inout_ PSYMCRYPT_COMMON_HASH_STATE pState, + _In_reads_bytes_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + UINT32 bytesInBuffer; + UINT32 freeInBuffer; + SIZE_T tmp; + + SYMCRYPT_CHECK_MAGIC( pState ); + + pState->dataLengthL += cbData; + if( pState->dataLengthL < cbData ) { + pState->dataLengthH ++; // This is almost-unreachable code as it requires 2^64 bytes to be hashed. + } + + bytesInBuffer = pState->bytesInBuffer; + + // + // If previous data in buffer, buffer new input and transform if possible. + // + if( bytesInBuffer > 0 ) + { + SYMCRYPT_ASSERT( pHash->inputBlockSize > bytesInBuffer ); + + freeInBuffer = pHash->inputBlockSize - bytesInBuffer; + if( cbData < freeInBuffer ) + { + // + // All the data will fit in the buffer. + // We don't do anything here. + // As cbData < inputBlockSize the bulk data processing is skipped, + // and the data will be copied to the buffer at the end + // of this code. + } else { + // + // Enough data to fill the whole buffer & process it + // + memcpy(&pState->buffer[bytesInBuffer], pbData, freeInBuffer); + pbData += freeInBuffer; + cbData -= freeInBuffer; + (*pHash->appendBlockFunc)( (PBYTE)pState + pHash->chainOffset, &pState->buffer[0], pHash->inputBlockSize, &tmp ); + + bytesInBuffer = 0; + } + } + + // + // Internal buffer is empty; process all remaining whole blocks in the input + // + if( cbData >= pHash->inputBlockSize ) + { + (*pHash->appendBlockFunc)( (PBYTE)pState + pHash->chainOffset, pbData, cbData, &tmp ); + SYMCRYPT_ASSERT( tmp < pHash->inputBlockSize ); + pbData += cbData - tmp; + cbData = tmp; + } + + SYMCRYPT_ASSERT( cbData < pHash->inputBlockSize ); + + // + // buffer remaining input if necessary. + // + if( cbData > 0 ) + { + memcpy( &pState->buffer[bytesInBuffer], pbData, cbData ); + bytesInBuffer += (UINT32) cbData; + } + + pState->bytesInBuffer = bytesInBuffer; +} + +VOID +SYMCRYPT_CALL +SymCryptHashCommonPaddingMd4Style( + _In_ PCSYMCRYPT_HASH pHash, + _Inout_ PSYMCRYPT_COMMON_HASH_STATE pState ) +{ + SIZE_T tmp; + SIZE_T bytesInBuffer = pState->bytesInBuffer; + + SYMCRYPT_CHECK_MAGIC( pState ); + SYMCRYPT_ASSERT( pHash->inputBlockSize == 64 ); + SYMCRYPT_ASSERT( bytesInBuffer == (pState->dataLengthL & 0x3f) ); + + // + // The buffer is never completely full, so we can always put the first + // padding byte in. + // + pState->buffer[bytesInBuffer++] = 0x80; + + if( bytesInBuffer > 64-8 ) { + // + // No room for the rest of the padding. Pad with zeroes & process block + // bytesInBuffer is at most 64, so we do not have an integer underflow + // + SymCryptWipe( &pState->buffer[bytesInBuffer], 64-bytesInBuffer ); + (*pHash->appendBlockFunc)( (PBYTE)pState + pHash->chainOffset, pState->buffer, 64, &tmp ); + SYMCRYPT_ASSERT( tmp == 0 ); + bytesInBuffer = 0; + } + + // + // Set rest of padding + // At this point bytesInBuffer <= 64-8, so we don't have an underflow + // We wipe to the end of the buffer as it is 16-aligned, + // and it is faster to wipe to an aligned point + // + SymCryptWipe( &pState->buffer[bytesInBuffer], 64-bytesInBuffer ); + SYMCRYPT_STORE_LSBFIRST64( &pState->buffer[64-8], pState->dataLengthL * 8 ); + + // + // Process the final block + // + (*pHash->appendBlockFunc)( (PBYTE)pState + pHash->chainOffset, pState->buffer, 64, &tmp ); +} + + + +SIZE_T +SYMCRYPT_CALL +SymCryptHashResultSize( _In_ PCSYMCRYPT_HASH pHash ) +{ + return pHash->resultSize; +} + + +SIZE_T +SYMCRYPT_CALL +SymCryptHashInputBlockSize( _In_ PCSYMCRYPT_HASH pHash ) +{ + return pHash->inputBlockSize; +} + +SIZE_T +SYMCRYPT_CALL +SymCryptHashStateSize( _In_ PCSYMCRYPT_HASH pHash ) +{ + return pHash->stateSize; +} + + +VOID +SYMCRYPT_CALL +SymCryptHash( + _In_ PCSYMCRYPT_HASH pHash, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_MIN( cbResult, pHash->resultSize ) ) PBYTE pbResult, + SIZE_T cbResult ) +{ + SYMCRYPT_HASH_STATE hash; + + _Analysis_assume_( pHash->stateSize <= sizeof( hash ) ); + SymCryptHashInit( pHash, &hash ); + SymCryptHashAppend( pHash, &hash, pbData, cbData ); + SymCryptHashResult( pHash, &hash, pbResult, cbResult ); + SymCryptWipe( &hash, pHash->stateSize ); +} + +VOID +SYMCRYPT_CALL +SymCryptHashInit( + _In_ PCSYMCRYPT_HASH pHash, + _Out_writes_bytes_( pHash->stateSize ) PVOID pState ) +{ + (*pHash->initFunc)( pState ); +} + +VOID +SYMCRYPT_CALL +SymCryptHashAppend( + _In_ PCSYMCRYPT_HASH pHash, + _Inout_updates_bytes_( pHash->stateSize ) PVOID pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + (*pHash->appendFunc)( pState, pbData, cbData ); +} + +VOID +SYMCRYPT_CALL +SymCryptHashResult( + _In_ PCSYMCRYPT_HASH pHash, + _Inout_updates_bytes_( pHash->stateSize ) PVOID pState, + _Out_writes_( SYMCRYPT_MIN( cbResult, pHash->resultSize ) ) PBYTE pbResult, + SIZE_T cbResult ) +{ + BYTE buf[SYMCRYPT_HASH_MAX_RESULT_SIZE]; + + _Analysis_assume_( pHash->resultSize <= SYMCRYPT_HASH_MAX_RESULT_SIZE ); + + (*pHash->resultFunc)( pState, buf ); + memcpy( pbResult, buf, SYMCRYPT_MIN( cbResult, pHash->resultSize )); + SymCryptWipe( buf, pHash->resultSize ); +} + +VOID +SYMCRYPT_CALL +SymCryptHashStateCopy( + _In_ PCSYMCRYPT_HASH pHash, + _In_reads_( pHash->stateSize ) PCVOID pSrc, + _Out_writes_( pHash->stateSize ) PVOID pDst) +{ + (*pHash->stateCopyFunc)( pSrc, pDst ); +} diff --git a/libs/symcrypt/lib/hash_buffer_pattern.c b/libs/symcrypt/lib/hash_buffer_pattern.c new file mode 100644 index 00000000000..c0d5dd19c92 --- /dev/null +++ b/libs/symcrypt/lib/hash_buffer_pattern.c @@ -0,0 +1,75 @@ +// +// hash_buffer_pattern.c +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#if 0 +#pragma makedep header +#endif + +/* +SymCryptXxxAppend( _Inout_ SYMCRYPT_Xxx_STATE * state, + _In_reads_bytes_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + <Set up a SIZE_T variable 'bytesInBuffer' that contains the # bytes in the buffer> +*/ + + // + // Truncate bytesInBuffer so that we never have an integer overflow. + // + bytesInBuffer &= SYMCRYPT_XXX_INPUT_BLOCK_SIZE - 1; + + // + // If previous data in buffer, buffer new input and transform if possible. + // + if (bytesInBuffer > 0) + { + SIZE_T freeInBuffer = SYMCRYPT_XXX_INPUT_BLOCK_SIZE - bytesInBuffer; + if( cbData < freeInBuffer ) + { + // + // All the data will fit in the buffer. + // We don't do anything here. + // As cbData < INPUT_BLOCK_SIZE the bulk data processing is skipped, + // and the data will be copied to the buffer at the end + // of this code. + } else { + // + // Enough data to fill the whole buffer & process it + // + memcpy(&state->buffer[bytesInBuffer], pbData, freeInBuffer); + pbData += freeInBuffer; + cbData -= freeInBuffer; + SYMCRYPT_XxxAppendBlocks( &state->chain, state->buffer, SYMCRYPT_XXX_INPUT_BLOCK_SIZE ); + + // + // Set bytesInBuffer to zero to ensure that the trailing data in the + // buffer will be copied to the right location of the buffer below. + // + bytesInBuffer = 0; + } + } + + // + // Internal buffer is empty; process all remaining whole blocks in the input + // + if( cbData >= SYMCRYPT_XXX_INPUT_BLOCK_SIZE ) + { + SIZE_T cbDataRoundedDown = cbData & ~(SIZE_T)(SYMCRYPT_XXX_INPUT_BLOCK_SIZE - 1); + SYMCRYPT_XxxAppendBlocks( &state->chain, pbData, cbDataRoundedDown ); + pbData += cbDataRoundedDown; + cbData -= cbDataRoundedDown; + } + + // + // buffer remaining input if necessary. + // + if( cbData > 0 ) + { + memcpy( &state->buffer[bytesInBuffer], pbData, cbData ); + } + +/* +} +*/ diff --git a/libs/symcrypt/lib/hash_pattern.c b/libs/symcrypt/lib/hash_pattern.c new file mode 100644 index 00000000000..57920c7cb71 --- /dev/null +++ b/libs/symcrypt/lib/hash_pattern.c @@ -0,0 +1,39 @@ +// +// hash_pattern.c +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#if 0 +#pragma makedep header +#endif + +// +// This is a file that is #included to define the +// all-in-one hash function. +// + + + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SYMCRYPT_Xxx( + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( CONCAT3( SYMCRYPT_, ALG, _RESULT_SIZE ) ) PBYTE pbResult ) +{ + SYMCRYPT_XXX_STATE state; + + SYMCRYPT_XxxInit( &state ); + SYMCRYPT_XxxAppend( &state, pbData, cbData ); + SYMCRYPT_XxxResult( & state, pbResult ); +} + +VOID +SYMCRYPT_CALL +SYMCRYPT_XxxStateCopy( _In_ const SYMCRYPT_XXX_STATE * pSrc, _Out_ SYMCRYPT_XXX_STATE * pDst ) +{ + SYMCRYPT_CHECK_MAGIC( pSrc ); + *pDst = *pSrc; + SYMCRYPT_SET_MAGIC( pDst ); +} diff --git a/libs/symcrypt/lib/hkdf.c b/libs/symcrypt/lib/hkdf.c new file mode 100644 index 00000000000..7a188f9b674 --- /dev/null +++ b/libs/symcrypt/lib/hkdf.c @@ -0,0 +1,229 @@ +// +// hkdf.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +// +// This module contains the routines to implement the HKDF +// function for the TLS protocol 1.3. It is used in +// the protocol's key derivation function. +// +// + +#include "precomp.h" + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHkdfExpandKey( + _Out_ PSYMCRYPT_HKDF_EXPANDED_KEY pExpandedKey, + _In_ PCSYMCRYPT_MAC macAlgorithm, + _In_reads_(cbIkm) PCBYTE pbIkm, + SIZE_T cbIkm, + _In_reads_opt_(cbSalt) PCBYTE pbSalt, + SIZE_T cbSalt ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + SYMCRYPT_ALIGN BYTE rbPrk[SYMCRYPT_MAC_MAX_RESULT_SIZE] = { 0 }; + + SYMCRYPT_ASSERT( macAlgorithm->expandedKeySize <= sizeof( pExpandedKey->macKey ) ); + + scError = SymCryptHkdfExtractPrk( macAlgorithm, pbIkm, cbIkm, pbSalt, cbSalt, rbPrk, macAlgorithm->resultSize ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + scError = SymCryptHkdfPrkExpandKey( pExpandedKey, macAlgorithm, rbPrk, macAlgorithm->resultSize ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + +cleanup: + SymCryptWipeKnownSize(&rbPrk[0], sizeof(rbPrk)); + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHkdfExtractPrk( + _In_ PCSYMCRYPT_MAC macAlgorithm, + _In_reads_(cbIkm) PCBYTE pbIkm, + SIZE_T cbIkm, + _In_reads_opt_(cbSalt) PCBYTE pbSalt, + SIZE_T cbSalt, + _Out_writes_(cbPrk) PBYTE pbPrk, + SIZE_T cbPrk ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + SYMCRYPT_MAC_STATE state; + SYMCRYPT_MAC_EXPANDED_KEY key; + + // Ensure that pbPrk is the correct size + if (cbPrk != macAlgorithm->resultSize) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Calculation of PRK = HMAC-Hash(salt, IKM) + scError = macAlgorithm->expandKeyFunc( &key, pbSalt, cbSalt ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + macAlgorithm->initFunc( &state, &key ); + macAlgorithm->appendFunc( &state, pbIkm, cbIkm ); + macAlgorithm->resultFunc( &state, pbPrk ); + +cleanup: + SymCryptWipeKnownSize(&key, sizeof(key)); + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHkdfPrkExpandKey( + _Out_ PSYMCRYPT_HKDF_EXPANDED_KEY pExpandedKey, + _In_ PCSYMCRYPT_MAC macAlgorithm, + _In_reads_(cbPrk) PCBYTE pbPrk, + SIZE_T cbPrk ) +{ + SYMCRYPT_ASSERT( macAlgorithm->expandedKeySize <= sizeof( pExpandedKey->macKey ) ); + + pExpandedKey->macAlg = macAlgorithm; + return macAlgorithm->expandKeyFunc( &pExpandedKey->macKey, pbPrk, cbPrk ); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHkdfDerive( + _In_ PCSYMCRYPT_HKDF_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbInfo) PCBYTE pbInfo, + SIZE_T cbInfo, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult) +{ + + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + SYMCRYPT_MAC_STATE state; + + PCSYMCRYPT_MAC pMacAlgorithm = pExpandedKey->macAlg; + + SYMCRYPT_ALIGN BYTE rbPartialResult[SYMCRYPT_MAC_MAX_RESULT_SIZE]; + BYTE * pbCurr = pbResult; + + SIZE_T cbMacResultSize = pMacAlgorithm->resultSize; + + BYTE cntr = 0x01; + + // Check that cbResult <= 255*HashLen + if (cbResult > 0xff * cbMacResultSize) + { + scError = SYMCRYPT_WRONG_DATA_SIZE; + goto cleanup; + } + + // In the first iteration T(0) is the empty string + // Calculate T(1) = HMAC-Hash(PRK, T(0) | info | 0x01) + pMacAlgorithm->initFunc( &state, pExpandedKey ); + pMacAlgorithm->appendFunc( &state, pbInfo, cbInfo ); + pMacAlgorithm->appendFunc( &state, &cntr, sizeof(cntr) ); + pMacAlgorithm->resultFunc( &state, rbPartialResult ); + + // Store the result in the output buffer + memcpy(pbCurr, rbPartialResult, SYMCRYPT_MIN(cbResult, cbMacResultSize)); + if (cbResult <= cbMacResultSize) + { + goto cleanup; + } + + // Update counters + cntr++; + pbCurr += cbMacResultSize; + cbResult -= cbMacResultSize; + + while( cbResult > 0 ) + { + // Calculate T(i) = HMAC-Hash(PRK, T(i-1) | info | 0xi) + pMacAlgorithm->initFunc( &state, pExpandedKey ); + pMacAlgorithm->appendFunc( &state, rbPartialResult, cbMacResultSize ); + pMacAlgorithm->appendFunc( &state, pbInfo, cbInfo ); + pMacAlgorithm->appendFunc( &state, &cntr, sizeof(cntr) ); + pMacAlgorithm->resultFunc( &state, rbPartialResult ); + + // Store the result in the output buffer + memcpy(pbCurr, rbPartialResult, SYMCRYPT_MIN(cbResult, cbMacResultSize)); + if (cbResult <= cbMacResultSize) + { + goto cleanup; + } + + // Update counters + cntr++; + pbCurr += cbMacResultSize; + cbResult -= cbMacResultSize; + } + +cleanup: + SymCryptWipeKnownSize(&rbPartialResult[0], sizeof(rbPartialResult)); + + return scError; +} + +// +// The full HKDF +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHkdf( + PCSYMCRYPT_MAC macAlgorithm, + _In_reads_(cbIkm) PCBYTE pbIkm, + SIZE_T cbIkm, + _In_reads_opt_(cbSalt) PCBYTE pbSalt, + SIZE_T cbSalt, + _In_reads_opt_(cbInfo) PCBYTE pbInfo, + SIZE_T cbInfo, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + SYMCRYPT_HKDF_EXPANDED_KEY key; + + // Create the expanded key + scError = SymCryptHkdfExpandKey( + &key, + macAlgorithm, + pbIkm, + cbIkm, + pbSalt, + cbSalt ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // Derive the key + scError = SymCryptHkdfDerive( + &key, + pbInfo, + cbInfo, + pbResult, + cbResult ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + +cleanup: + SymCryptWipeKnownSize(&key, sizeof(key)); + + return scError; +} diff --git a/libs/symcrypt/lib/hmac.c b/libs/symcrypt/lib/hmac.c new file mode 100644 index 00000000000..766fd9eb95e --- /dev/null +++ b/libs/symcrypt/lib/hmac.c @@ -0,0 +1,195 @@ +// +// hmac.c +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +VOID +SYMCRYPT_CALL +SymCryptHmacStateCopy( + _In_ PCSYMCRYPT_HMAC_STATE pSrc, + _In_opt_ PCSYMCRYPT_HMAC_EXPANDED_KEY pExpandedKey, + _Out_ PSYMCRYPT_HMAC_STATE pDst ) +{ + SYMCRYPT_CHECK_MAGIC( pSrc ); + + PCSYMCRYPT_HASH pHash = pSrc->pKey->pHash; + + SymCryptHashStateCopy( pHash, &pSrc->hash, &pDst->hash ); + + if( pExpandedKey != NULL ) + { + SYMCRYPT_CHECK_MAGIC( pExpandedKey ); + pDst->pKey = pExpandedKey; + } + else + { + SYMCRYPT_CHECK_MAGIC( pSrc->pKey ); + pDst->pKey = pSrc->pKey; + } + SYMCRYPT_SET_MAGIC( pDst ); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacKeyCopy( + _In_ PCSYMCRYPT_HMAC_EXPANDED_KEY pSrc, + _Out_ PSYMCRYPT_HMAC_EXPANDED_KEY pDst ) +{ + SYMCRYPT_CHECK_MAGIC( pSrc ); + + // Copy innerState and outerState + SymCryptHashStateCopy(pSrc->pHash, &pSrc->innerState, &pDst->innerState ); + SymCryptHashStateCopy(pSrc->pHash, &pSrc->outerState, &pDst->outerState ); + + pDst->pHash = pSrc->pHash; + + SYMCRYPT_SET_MAGIC( pDst ); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHmacExpandKey( + _In_ PCSYMCRYPT_HASH pHash, + _Out_ PSYMCRYPT_HMAC_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ) +{ + // This buffer has to be large enough to hold one input block + // and the result of the hash function. + // Using SHA3-224 input block size to satisfy those requirements. + SYMCRYPT_ALIGN BYTE iblock[ SYMCRYPT_SHA3_224_INPUT_BLOCK_SIZE ]; + + SYMCRYPT_ASSERT( sizeof(iblock) >= pHash->inputBlockSize ); + SYMCRYPT_ASSERT( sizeof(iblock) >= pHash->resultSize ); + + // XorByteIntoBuffer function updates the buffer in multiples of 8-bytes + SYMCRYPT_ASSERT( pHash->inputBlockSize % 8 == 0); + + memset( iblock, 0, sizeof( iblock ) ); + + if( cbKey <= pHash->inputBlockSize ) + { + if( cbKey > 0 ) + { + memcpy( iblock, pbKey, cbKey ); + } + } + else + { + SymCryptHash( pHash, pbKey, cbKey, iblock, pHash->resultSize ); + } + + XorByteIntoBuffer( iblock, pHash->inputBlockSize / 8, HMAC_IPAD_BYTE ); + + // + // Initialize the inner and outer states in the expanded key + // + SymCryptHashInit( pHash, &pExpandedKey->innerState ); + SymCryptHashInit( pHash, &pExpandedKey->outerState ); + + // Update the inner state in the expanded key + SymCryptHashAppend( pHash, &pExpandedKey->innerState, iblock, pHash->inputBlockSize ); + + XorByteIntoBuffer( iblock, pHash->inputBlockSize / 8, HMAC_IPAD_BYTE ^ HMAC_OPAD_BYTE ); + + // Update the outer state in the expanded key + SymCryptHashAppend( pHash, &pExpandedKey->outerState, iblock, pHash->inputBlockSize ); + + SymCryptWipeKnownSize( iblock, sizeof( iblock ) ); + + // Save the hash function in the expanded key, it will be used in other + // generic HMAC function calls. + pExpandedKey->pHash = pHash; + + SYMCRYPT_SET_MAGIC(pExpandedKey); + + return SYMCRYPT_NO_ERROR; +} + + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptHmacInit( + _Out_ PSYMCRYPT_HMAC_STATE pState, + _In_ PCSYMCRYPT_HMAC_EXPANDED_KEY pExpandedKey ) +{ + SYMCRYPT_CHECK_MAGIC( pExpandedKey ); + + SymCryptHashStateCopy( pExpandedKey->pHash, &pExpandedKey->innerState, &pState->hash ); + + pState->pKey = pExpandedKey; + + SYMCRYPT_SET_MAGIC(pState); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacAppend( + _Inout_ PSYMCRYPT_HMAC_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + SymCryptHashAppend( pState->pKey->pHash, &pState->hash, pbData, cbData ); +} + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptHmacResult( + _Inout_ PSYMCRYPT_HMAC_STATE pState, + _Out_writes_( pState->pKey->pHash->resultSize ) PBYTE pbResult ) +{ + BYTE innerRes[64]; + + PCSYMCRYPT_HASH pHash = pState->pKey->pHash; + + SYMCRYPT_ASSERT(sizeof(innerRes) >= pHash->resultSize); + + SYMCRYPT_CHECK_MAGIC( pState ); + + // + // We have to buffer the inner hash result. We can't put it directly in the + // hash state data buffer as the Result() function wipes that buffer before returning. + // + SymCryptHashResult( pHash, &pState->hash, innerRes, pHash->resultSize ); + + SYMCRYPT_CHECK_MAGIC( pState->pKey ) + + SymCryptHashStateCopy( pHash, &pState->pKey->outerState, &pState->hash ); + + SymCryptHashAppend( pHash, &pState->hash, innerRes, pHash->resultSize ); + + SymCryptHashResult( pHash, &pState->hash, pbResult, pHash->resultSize ); + + // + // The SymCryptHashResult already wipes the hash state. + // We only need to wipe our own buffer. + // + // We also set the key pointer to NULL. This is not for security; + // it creates a clear error when callers forget to call the Init routine + // when re-using a state. Rather than the wrong result, they will get + // a NULL pointer exception, and they will fix their code. + // + + SymCryptWipeKnownSize( innerRes, sizeof( innerRes ) ); + pState->pKey = NULL; +} + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptHmac( + _In_ PCSYMCRYPT_HMAC_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( pExpandedKey->pHash->resultSize ) PBYTE pbResult ) +{ + SYMCRYPT_HMAC_STATE state; + + SymCryptHmacInit( &state, pExpandedKey ); + SymCryptHmacAppend( &state, pbData, cbData ); + SymCryptHmacResult( &state, pbResult ); +} diff --git a/libs/symcrypt/lib/hmac_pattern.c b/libs/symcrypt/lib/hmac_pattern.c new file mode 100644 index 00000000000..1d4ccf7c7b5 --- /dev/null +++ b/libs/symcrypt/lib/hmac_pattern.c @@ -0,0 +1,197 @@ +// +// hmac_pattern.c +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#if 0 +#pragma makedep header +#endif + +VOID +SYMCRYPT_CALL +SYMCRYPT_HmacXxxStateCopy( + _In_ PCSYMCRYPT_HMAC_XXX_STATE pSrc, + _In_opt_ PCSYMCRYPT_HMAC_XXX_EXPANDED_KEY pExpandedKey, + _Out_ PSYMCRYPT_HMAC_XXX_STATE pDst ) +{ + SYMCRYPT_CHECK_MAGIC( pSrc ); + + SYMCRYPT_XxxStateCopy( &pSrc->hash, &pDst->hash ); + + if( pExpandedKey != NULL ) + { + SYMCRYPT_CHECK_MAGIC( pExpandedKey ); + pDst->pKey = pExpandedKey; + } + else + { + SYMCRYPT_CHECK_MAGIC( pSrc->pKey ); + pDst->pKey = pSrc->pKey; + } + SYMCRYPT_SET_MAGIC( pDst ); +} + +VOID +SYMCRYPT_CALL +SYMCRYPT_HmacXxxKeyCopy( _In_ PCSYMCRYPT_HMAC_XXX_EXPANDED_KEY pSrc, _Out_ PSYMCRYPT_HMAC_XXX_EXPANDED_KEY pDst ) +{ + SYMCRYPT_CHECK_MAGIC( pSrc ); + *pDst = *pSrc; + SYMCRYPT_SET_MAGIC( pDst ); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SYMCRYPT_HmacXxxExpandKey( + _Out_ PSYMCRYPT_HMAC_XXX_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ) +{ + SYMCRYPT_XXX_STATE hashState; + SYMCRYPT_ALIGN BYTE iblock[ SYMCRYPT_XXX_INPUT_BLOCK_SIZE ]; // One input block for the hash function + SIZE_T tmp; + + SYMCRYPT_SET_MAGIC( pExpandedKey ); + + // + // Initialize our hash state and our input block + // We wipe the whole block & then copy the key into it. This is often faster + // as the compiler can optimize the wipe because it knows the size at compile time. + // + SYMCRYPT_XxxInit( &hashState ); + memset( iblock, 0, sizeof( iblock ) ); + + if( cbKey <= sizeof( iblock ) ) + { + if( cbKey > 0 ) + { + memcpy( iblock, pbKey, cbKey ); + } + } else { + // + // We can use the existing MD5 state to hash the long key. + // The state is re-initialized by the SymCryptMd5Result() function. + // + SYMCRYPT_XxxAppend( &hashState, pbKey, cbKey ); + SYMCRYPT_XxxResult( &hashState, iblock ); + } + + XorByteIntoBuffer( iblock, sizeof( iblock )/8, HMAC_IPAD_BYTE ); + + // + // Copy the initial chaining state to both states in the expanded key + // + pExpandedKey->innerState = hashState.chain; + pExpandedKey->outerState = hashState.chain; + + // + // Update the state in the expanded key directly + // + SYMCRYPT_XxxAppendBlocks( &pExpandedKey->innerState, iblock, sizeof( iblock ), &tmp ); + + XorByteIntoBuffer( iblock, sizeof( iblock )/8, HMAC_IPAD_BYTE ^ HMAC_OPAD_BYTE ); + + SYMCRYPT_XxxAppendBlocks( &pExpandedKey->outerState, iblock, sizeof( iblock ), &tmp ); + + SymCryptWipeKnownSize( iblock, sizeof( iblock ) ); + SymCryptWipeKnownSize( &hashState, sizeof( hashState ) ); + + return SYMCRYPT_NO_ERROR; +} + + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SYMCRYPT_HmacXxxInit( + _Out_ PSYMCRYPT_HMAC_XXX_STATE pState, + _In_ PCSYMCRYPT_HMAC_XXX_EXPANDED_KEY pExpandedKey) +{ + SYMCRYPT_CHECK_MAGIC( pExpandedKey ); + + SYMCRYPT_SET_MAGIC( pState ); + + // + // We don't call SymCryptXxxInit on the hash sub-state; + // instead we directly initialize its fields. + // + SYMCRYPT_SET_MAGIC( &pState->hash ); + pState->hash.chain = pExpandedKey->innerState; + SET_DATALENGTH( pState->hash, SYMCRYPT_XXX_INPUT_BLOCK_SIZE ); + pState->hash.bytesInBuffer = 0; + pState->pKey = pExpandedKey; +} + +VOID +SYMCRYPT_CALL +SYMCRYPT_HmacXxxAppend( + _Inout_ PSYMCRYPT_HMAC_XXX_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + SYMCRYPT_XxxAppend( &pState->hash, pbData, cbData ); +} + +C_ASSERT( SYMCRYPT_XXX_RESULT_SIZE == SYMCRYPT_HMAC_XXX_RESULT_SIZE ); + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SYMCRYPT_HmacXxxResult( + _Inout_ PSYMCRYPT_HMAC_XXX_STATE pState, + _Out_writes_( SYMCRYPT_HMAC_XXX_RESULT_SIZE ) PBYTE pbResult ) +{ + BYTE innerRes[SYMCRYPT_XXX_RESULT_SIZE]; + + SYMCRYPT_CHECK_MAGIC( pState ); + + // + // We have to buffer the inner hash result. We can't put it directly in the + // hash state data buffer as the Result() function wipes that buffer before returning. + // + + SYMCRYPT_XxxResult( &pState->hash, innerRes ); + + SYMCRYPT_CHECK_MAGIC( pState->pKey ) + + pState->hash.chain = pState->pKey->outerState; + + // + // We put the data directly in the buffer, rather than call the Append function. + // + memcpy( &pState->hash.buffer, innerRes, sizeof( innerRes ) ); + SET_DATALENGTH( pState->hash, SYMCRYPT_XXX_INPUT_BLOCK_SIZE + SYMCRYPT_XXX_RESULT_SIZE ); + pState->hash.bytesInBuffer = SYMCRYPT_XXX_RESULT_SIZE; + + SYMCRYPT_XxxResult( &pState->hash, pbResult ); + + // + // The SymCryptXxxResult already wipes the hash state. + // We only need to wipe our own buffer. + // + // We also set the key pointer to NULL. This is not for security; + // it creates a clear error when callers forget to call the Init routine + // when re-using a state. Rather than the wrong result, they will get + // a NULL pointer exception, and they will fix their code. + // + + SymCryptWipe( innerRes, sizeof( innerRes ) ); + pState->pKey = NULL; +} + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SYMCRYPT_HmacXxx( + _In_ PCSYMCRYPT_HMAC_XXX_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_HMAC_XXX_RESULT_SIZE ) PBYTE pbResult ) +{ + SYMCRYPT_HMAC_XXX_STATE state; + + SYMCRYPT_HmacXxxInit( &state, pExpandedKey ); + SYMCRYPT_HmacXxxAppend( &state, pbData, cbData ); + SYMCRYPT_HmacXxxResult( &state, pbResult ); + +} diff --git a/libs/symcrypt/lib/hmacmd5.c b/libs/symcrypt/lib/hmacmd5.c new file mode 100644 index 00000000000..03f5fe01aea --- /dev/null +++ b/libs/symcrypt/lib/hmacmd5.c @@ -0,0 +1,56 @@ +// +// HmacMd5.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +#define ALG MD5 +#define Alg Md5 +#define SET_DATALENGTH( state, len ) {state.dataLengthL = len;} +#include "hmac_pattern.c" +#undef SET_DATALENGTH +#undef Alg +#undef ALG + +const SYMCRYPT_MAC SymCryptHmacMd5Algorithm_default = { + SymCryptHmacMd5ExpandKey, + SymCryptHmacMd5Init, + SymCryptHmacMd5Append, + SymCryptHmacMd5Result, + sizeof(SYMCRYPT_HMAC_MD5_EXPANDED_KEY), + sizeof(SYMCRYPT_HMAC_MD5_STATE), + SYMCRYPT_HMAC_MD5_RESULT_SIZE, + &SymCryptMd5Algorithm, + 0, +}; + +const PCSYMCRYPT_MAC SymCryptHmacMd5Algorithm = &SymCryptHmacMd5Algorithm_default; + +static const BYTE hmacMd5Kat[16] = { + 0x77, 0x33, 0x69, 0x79, 0x9e, 0x54, 0xeb, 0x49, 0xff, 0x21, 0xe6, 0xf9, 0x63, 0xe5, 0xbb, 0x49, +}; + +VOID +SYMCRYPT_CALL +SymCryptHmacMd5Selftest(void) +{ + SYMCRYPT_HMAC_MD5_EXPANDED_KEY xKey; + BYTE res[SYMCRYPT_HMAC_MD5_RESULT_SIZE]; + + SymCryptHmacMd5ExpandKey( &xKey, SymCryptTestKey32, 16 ); + SymCryptHmacMd5( &xKey, SymCryptTestMsg3, sizeof( SymCryptTestMsg3 ), res ); + + SymCryptInjectError( res, sizeof( res ) ); + + if( memcmp( res, hmacMd5Kat, sizeof( res ) ) != 0 ) + { + SymCryptFatal( 'hmd5'); + } + + // + // Normally we would wipe the expanded key structure here, + // but as this is a selftest with known data this is not needed. + // +} diff --git a/libs/symcrypt/lib/hmacsha1.c b/libs/symcrypt/lib/hmacsha1.c new file mode 100644 index 00000000000..e8a8de0c4af --- /dev/null +++ b/libs/symcrypt/lib/hmacsha1.c @@ -0,0 +1,65 @@ +// +// HmacSha1.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +// +// This implementation of HMAC uses extensive knowledge of the internal workings of the +// SHA1 implementation and uses internal routines. +// This reduces the overhead per HMAC computation by up to 20%, which is significant +// enough to take on the added complexity. +// + +#define ALG SHA1 +#define Alg Sha1 +#define SET_DATALENGTH( state, len ) {state.dataLengthL = len;} +#include "hmac_pattern.c" +#undef SET_DATALENGTH +#undef Alg +#undef ALG + +const SYMCRYPT_MAC SymCryptHmacSha1Algorithm_default = { + SymCryptHmacSha1ExpandKey, + SymCryptHmacSha1Init, + SymCryptHmacSha1Append, + SymCryptHmacSha1Result, + sizeof(SYMCRYPT_HMAC_SHA1_EXPANDED_KEY), + sizeof(SYMCRYPT_HMAC_SHA1_STATE), + SYMCRYPT_HMAC_SHA1_RESULT_SIZE, + &SymCryptSha1Algorithm, + SYMCRYPT_FIELD_OFFSET( SYMCRYPT_HMAC_SHA1_EXPANDED_KEY, outerState ), +}; + +const PCSYMCRYPT_MAC SymCryptHmacSha1Algorithm = &SymCryptHmacSha1Algorithm_default; + +static const BYTE hmacSha1Kat[20] = { + 0x2a, 0x29, 0x85, 0x40, 0x23, 0xba, 0x2e, 0xf1, + 0x49, 0x0f, 0x8c, 0xd8, 0x97, 0xa8, 0xcc, 0x6b, + 0x55, 0x7b, 0x2a, 0x12, +}; + +VOID +SYMCRYPT_CALL +SymCryptHmacSha1Selftest(void) +{ + SYMCRYPT_HMAC_SHA1_EXPANDED_KEY xKey; + BYTE res[SYMCRYPT_HMAC_SHA1_RESULT_SIZE]; + + SymCryptHmacSha1ExpandKey( &xKey, SymCryptTestKey32, 16 ); + SymCryptHmacSha1( &xKey, SymCryptTestMsg3, sizeof( SymCryptTestMsg3 ), res ); + + SymCryptInjectError( res, sizeof( res ) ); + + if( memcmp( res, hmacSha1Kat, sizeof( res ) ) != 0 ) + { + SymCryptFatal( 'hSh1' ); + } + + // + // Normally we would wipe the expanded key structure here, + // but as this is a selftest with known data this is not needed. + // +} diff --git a/libs/symcrypt/lib/hmacsha224.c b/libs/symcrypt/lib/hmacsha224.c new file mode 100644 index 00000000000..fcfc8d85cdf --- /dev/null +++ b/libs/symcrypt/lib/hmacsha224.c @@ -0,0 +1,62 @@ +// +// HmacSha224.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +#define SymCryptSha224AppendBlocks SymCryptSha256AppendBlocks + +#define ALG SHA224 +#define Alg Sha224 +#define SET_DATALENGTH( state, len ) {state.dataLengthL = len;} +#include "hmac_pattern.c" +#undef SET_DATALENGTH +#undef Alg +#undef ALG + +const SYMCRYPT_MAC SymCryptHmacSha224Algorithm_default = { + SymCryptHmacSha224ExpandKey, + SymCryptHmacSha224Init, + SymCryptHmacSha224Append, + SymCryptHmacSha224Result, + sizeof(SYMCRYPT_HMAC_SHA224_EXPANDED_KEY), + sizeof(SYMCRYPT_HMAC_SHA224_STATE), + SYMCRYPT_HMAC_SHA224_RESULT_SIZE, + &SymCryptSha224Algorithm, + SYMCRYPT_FIELD_OFFSET( SYMCRYPT_HMAC_SHA224_EXPANDED_KEY, outerState ), +}; + +const PCSYMCRYPT_MAC SymCryptHmacSha224Algorithm = &SymCryptHmacSha224Algorithm_default; + +static const BYTE hmacSha224Kat[28] = { + 0x3e, 0x1c, 0x48, 0x2f, 0x66, 0x49, 0x67, 0xa9, + 0xad, 0x4f, 0x76, 0x52, 0x36, 0xf8, 0x5a, 0x1f, + 0x63, 0x5b, 0x34, 0xe9, 0x35, 0x71, 0x62, 0x35, + 0xa2, 0x9e, 0x61, 0xb1 +}; + + +VOID +SYMCRYPT_CALL +SymCryptHmacSha224Selftest(void) +{ + SYMCRYPT_HMAC_SHA224_EXPANDED_KEY xKey; + BYTE res[SYMCRYPT_HMAC_SHA224_RESULT_SIZE]; + + SymCryptHmacSha224ExpandKey( &xKey, SymCryptTestKey32, 16 ); + SymCryptHmacSha224( &xKey, SymCryptTestMsg3, sizeof( SymCryptTestMsg3 ), res ); + + SymCryptInjectError( res, sizeof( res ) ); + + if( memcmp( res, hmacSha224Kat, sizeof( res ) ) != 0 ) + { + SymCryptFatal( 'hsh4' ); + } + + // + // Normally we would wipe the expanded key structure here, + // but as this is a selftest with known data this is not needed. + // +} diff --git a/libs/symcrypt/lib/hmacsha256.c b/libs/symcrypt/lib/hmacsha256.c new file mode 100644 index 00000000000..c2641ef39aa --- /dev/null +++ b/libs/symcrypt/lib/hmacsha256.c @@ -0,0 +1,60 @@ +// +// HmacSha256.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +#define ALG SHA256 +#define Alg Sha256 +#define SET_DATALENGTH( state, len ) {state.dataLengthL = len;} +#include "hmac_pattern.c" +#undef SET_DATALENGTH +#undef Alg +#undef ALG + +const SYMCRYPT_MAC SymCryptHmacSha256Algorithm_default = { + SymCryptHmacSha256ExpandKey, + SymCryptHmacSha256Init, + SymCryptHmacSha256Append, + SymCryptHmacSha256Result, + sizeof(SYMCRYPT_HMAC_SHA256_EXPANDED_KEY), + sizeof(SYMCRYPT_HMAC_SHA256_STATE), + SYMCRYPT_HMAC_SHA256_RESULT_SIZE, + &SymCryptSha256Algorithm, + SYMCRYPT_FIELD_OFFSET( SYMCRYPT_HMAC_SHA256_EXPANDED_KEY, outerState ), +}; + +const PCSYMCRYPT_MAC SymCryptHmacSha256Algorithm = &SymCryptHmacSha256Algorithm_default; + +static const BYTE hmacSha256Kat[32] = { + 0xd6, 0x01, 0xcc, 0x17, 0x75, 0x59, 0xb0, 0x24, + 0x84, 0x59, 0x78, 0x7f, 0x7e, 0x80, 0x4e, 0xd7, + 0xf2, 0x76, 0x89, 0xb5, 0x99, 0x5c, 0x59, 0xb6, + 0x61, 0x80, 0x2d, 0x96, 0x82, 0xfd, 0xf8, 0xd2, +}; + + +VOID +SYMCRYPT_CALL +SymCryptHmacSha256Selftest(void) +{ + SYMCRYPT_HMAC_SHA256_EXPANDED_KEY xKey; + BYTE res[SYMCRYPT_HMAC_SHA256_RESULT_SIZE]; + + SymCryptHmacSha256ExpandKey( &xKey, SymCryptTestKey32, 16 ); + SymCryptHmacSha256( &xKey, SymCryptTestMsg3, sizeof( SymCryptTestMsg3 ), res ); + + SymCryptInjectError( res, sizeof( res ) ); + + if( memcmp( res, hmacSha256Kat, sizeof( res ) ) != 0 ) + { + SymCryptFatal( 'hsh2' ); + } + + // + // Normally we would wipe the expanded key structure here, + // but as this is a selftest with known data this is not needed. + // +} diff --git a/libs/symcrypt/lib/hmacsha384.c b/libs/symcrypt/lib/hmacsha384.c new file mode 100644 index 00000000000..36042092edf --- /dev/null +++ b/libs/symcrypt/lib/hmacsha384.c @@ -0,0 +1,59 @@ +// +// HmacSha512.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +#define SymCryptSha384AppendBlocks SymCryptSha512AppendBlocks + +#define ALG SHA384 +#define Alg Sha384 +#define SET_DATALENGTH( state, len ) {state.dataLengthL = len; state.dataLengthH = 0;} +#include "hmac_pattern.c" +#undef SET_DATALENGTH +#undef Alg +#undef ALG + +const SYMCRYPT_MAC SymCryptHmacSha384Algorithm_default = { + SymCryptHmacSha384ExpandKey, + SymCryptHmacSha384Init, + SymCryptHmacSha384Append, + SymCryptHmacSha384Result, + sizeof(SYMCRYPT_HMAC_SHA384_EXPANDED_KEY), + sizeof(SYMCRYPT_HMAC_SHA384_STATE), + SYMCRYPT_HMAC_SHA384_RESULT_SIZE, + &SymCryptSha384Algorithm, + SYMCRYPT_FIELD_OFFSET( SYMCRYPT_HMAC_SHA384_EXPANDED_KEY, outerState ), +}; + +const PCSYMCRYPT_MAC SymCryptHmacSha384Algorithm = &SymCryptHmacSha384Algorithm_default; + +static const BYTE hmacSha384Kat[48] = { + 0x67, 0xdb, 0x9d, 0x4d, 0x66, 0xed, 0xf2, 0xe7, 0x2b, 0x88, 0xb8, 0x50, 0x55, 0x68, 0xa0, 0x00, + 0xa9, 0x83, 0x2b, 0xa3, 0x5e, 0x4f, 0xde, 0xcf, 0xe5, 0x38, 0x9a, 0x5d, 0x92, 0x79, 0x81, 0x53, + 0x6d, 0xdb, 0x94, 0xc0, 0xf6, 0xc0, 0xbd, 0x94, 0xc4, 0x18, 0x96, 0x4b, 0xbe, 0x4b, 0x6c, 0xf2, +}; + +VOID +SYMCRYPT_CALL +SymCryptHmacSha384Selftest(void) +{ + SYMCRYPT_HMAC_SHA384_EXPANDED_KEY xKey; + BYTE res[SYMCRYPT_HMAC_SHA384_RESULT_SIZE]; + + SymCryptHmacSha384ExpandKey( &xKey, SymCryptTestKey32, 16 ); + SymCryptHmacSha384( &xKey, SymCryptTestMsg3, sizeof( SymCryptTestMsg3 ), res ); + + SymCryptInjectError( res, sizeof( res ) ); + if( memcmp( res, hmacSha384Kat, sizeof( res ) ) != 0 ) + { + SymCryptFatal( 'hsh3' ); + } + + // + // Normally we would wipe the expanded key structure here, + // but as this is a selftest with known data this is not needed. + // +} diff --git a/libs/symcrypt/lib/hmacsha3_224.c b/libs/symcrypt/lib/hmacsha3_224.c new file mode 100644 index 00000000000..8f3b7626e43 --- /dev/null +++ b/libs/symcrypt/lib/hmacsha3_224.c @@ -0,0 +1,122 @@ +// +// HmacSha3_224.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHmacSha3_224ExpandKey( + _Out_ PSYMCRYPT_HMAC_SHA3_224_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbKey) PCBYTE pbKey, + SIZE_T cbKey) +{ + return SymCryptHmacExpandKey(SymCryptSha3_224Algorithm, &pExpandedKey->generic, pbKey, cbKey); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_224KeyCopy( + _In_ PCSYMCRYPT_HMAC_SHA3_224_EXPANDED_KEY pSrc, + _Out_ PSYMCRYPT_HMAC_SHA3_224_EXPANDED_KEY pDst) +{ + SymCryptHmacKeyCopy(&pSrc->generic, &pDst->generic); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_224( + _In_ PCSYMCRYPT_HMAC_SHA3_224_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_HMAC_SHA3_224_RESULT_SIZE ) PBYTE pbResult ) +{ + SymCryptHmac(&pExpandedKey->generic, pbData, cbData, pbResult); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_224StateCopy( + _In_ PCSYMCRYPT_HMAC_SHA3_224_STATE pSrc, + _In_opt_ PCSYMCRYPT_HMAC_SHA3_224_EXPANDED_KEY pExpandedKey, + _Out_ PSYMCRYPT_HMAC_SHA3_224_STATE pDst ) +{ + SymCryptHmacStateCopy(&pSrc->generic, pExpandedKey == NULL ? NULL : &pExpandedKey->generic, &pDst->generic); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_224Init( + _Out_ PSYMCRYPT_HMAC_SHA3_224_STATE pState, + _In_ PCSYMCRYPT_HMAC_SHA3_224_EXPANDED_KEY pExpandedKey) +{ + SymCryptHmacInit(&pState->generic, &pExpandedKey->generic); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_224Append( + _Inout_ PSYMCRYPT_HMAC_SHA3_224_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + SymCryptHmacAppend(&pState->generic, pbData, cbData); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_224Result( + _Inout_ PSYMCRYPT_HMAC_SHA3_224_STATE pState, + _Out_writes_( SYMCRYPT_HMAC_SHA3_224_RESULT_SIZE ) PBYTE pbResult ) +{ + SymCryptHmacResult(&pState->generic, pbResult); +} + + +const SYMCRYPT_MAC SymCryptHmacSha3_224Algorithm_default = { + SymCryptHmacSha3_224ExpandKey, + SymCryptHmacSha3_224Init, + SymCryptHmacSha3_224Append, + SymCryptHmacSha3_224Result, + sizeof(SYMCRYPT_HMAC_SHA3_224_EXPANDED_KEY), + sizeof(SYMCRYPT_HMAC_SHA3_224_STATE), + SYMCRYPT_HMAC_SHA3_224_RESULT_SIZE, + &SymCryptSha3_224Algorithm, + SYMCRYPT_FIELD_OFFSET( SYMCRYPT_HMAC_SHA3_224_EXPANDED_KEY, generic.outerState ), +}; + +const PCSYMCRYPT_MAC SymCryptHmacSha3_224Algorithm = &SymCryptHmacSha3_224Algorithm_default; + + +static const BYTE hmacSha3_224Kat[28] = +{ + 0x10, 0x90, 0xac, 0xa1, 0xd5, 0xad, 0xc4, 0x12, + 0xf5, 0xe7, 0xb4, 0xdf, 0xd2, 0x87, 0x09, 0xdd, + 0x24, 0x82, 0xc0, 0x4a, 0x5e, 0x9a, 0x3b, 0xf0, + 0xc3, 0x35, 0x7e, 0x12 +}; + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_224Selftest(void) +{ + SYMCRYPT_HMAC_SHA3_224_EXPANDED_KEY xKey; + BYTE res[SYMCRYPT_HMAC_SHA3_224_RESULT_SIZE]; + + SymCryptHmacSha3_224ExpandKey( &xKey, SymCryptTestKey32, 16 ); + SymCryptHmacSha3_224( &xKey, SymCryptTestMsg3, sizeof( SymCryptTestMsg3 ), res ); + + SymCryptInjectError( res, sizeof( res ) ); + + if( memcmp( res, hmacSha3_224Kat, sizeof( res ) ) != 0 ) + { + SymCryptFatal( 'hsh3' ); + } + + // + // Normally we would wipe the expanded key structure here, + // but as this is a selftest with known data this is not needed. + // +} diff --git a/libs/symcrypt/lib/hmacsha3_256.c b/libs/symcrypt/lib/hmacsha3_256.c new file mode 100644 index 00000000000..8f145d398bf --- /dev/null +++ b/libs/symcrypt/lib/hmacsha3_256.c @@ -0,0 +1,122 @@ +// +// HmacSha3_256.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHmacSha3_256ExpandKey( + _Out_ PSYMCRYPT_HMAC_SHA3_256_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbKey) PCBYTE pbKey, + SIZE_T cbKey) +{ + return SymCryptHmacExpandKey(SymCryptSha3_256Algorithm, &pExpandedKey->generic, pbKey, cbKey); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_256KeyCopy( + _In_ PCSYMCRYPT_HMAC_SHA3_256_EXPANDED_KEY pSrc, + _Out_ PSYMCRYPT_HMAC_SHA3_256_EXPANDED_KEY pDst) +{ + SymCryptHmacKeyCopy(&pSrc->generic, &pDst->generic); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_256( + _In_ PCSYMCRYPT_HMAC_SHA3_256_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_HMAC_SHA3_256_RESULT_SIZE ) PBYTE pbResult ) +{ + SymCryptHmac(&pExpandedKey->generic, pbData, cbData, pbResult); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_256StateCopy( + _In_ PCSYMCRYPT_HMAC_SHA3_256_STATE pSrc, + _In_opt_ PCSYMCRYPT_HMAC_SHA3_256_EXPANDED_KEY pExpandedKey, + _Out_ PSYMCRYPT_HMAC_SHA3_256_STATE pDst ) +{ + SymCryptHmacStateCopy(&pSrc->generic, pExpandedKey == NULL ? NULL : &pExpandedKey->generic, &pDst->generic); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_256Init( + _Out_ PSYMCRYPT_HMAC_SHA3_256_STATE pState, + _In_ PCSYMCRYPT_HMAC_SHA3_256_EXPANDED_KEY pExpandedKey) +{ + SymCryptHmacInit(&pState->generic, &pExpandedKey->generic); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_256Append( + _Inout_ PSYMCRYPT_HMAC_SHA3_256_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + SymCryptHmacAppend(&pState->generic, pbData, cbData); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_256Result( + _Inout_ PSYMCRYPT_HMAC_SHA3_256_STATE pState, + _Out_writes_( SYMCRYPT_HMAC_SHA3_256_RESULT_SIZE ) PBYTE pbResult ) +{ + SymCryptHmacResult(&pState->generic, pbResult); +} + + +const SYMCRYPT_MAC SymCryptHmacSha3_256Algorithm_default = { + SymCryptHmacSha3_256ExpandKey, + SymCryptHmacSha3_256Init, + SymCryptHmacSha3_256Append, + SymCryptHmacSha3_256Result, + sizeof(SYMCRYPT_HMAC_SHA3_256_EXPANDED_KEY), + sizeof(SYMCRYPT_HMAC_SHA3_256_STATE), + SYMCRYPT_HMAC_SHA3_256_RESULT_SIZE, + &SymCryptSha3_256Algorithm, + SYMCRYPT_FIELD_OFFSET( SYMCRYPT_HMAC_SHA3_256_EXPANDED_KEY, generic.outerState ), +}; + +const PCSYMCRYPT_MAC SymCryptHmacSha3_256Algorithm = &SymCryptHmacSha3_256Algorithm_default; + + +static const BYTE hmacSha3_256Kat[32] = +{ + 0x18, 0xe8, 0x2e, 0xa4, 0x5a, 0x94, 0x07, 0xcc, + 0xb7, 0x87, 0x29, 0x16, 0x80, 0x99, 0xd6, 0xc6, + 0x73, 0x1b, 0x56, 0x2e, 0x0d, 0x16, 0x67, 0x5a, + 0x1f, 0xe2, 0xe3, 0xd6, 0x81, 0x56, 0x52, 0x77 +}; + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_256Selftest(void) +{ + SYMCRYPT_HMAC_SHA3_256_EXPANDED_KEY xKey; + BYTE res[SYMCRYPT_HMAC_SHA3_256_RESULT_SIZE]; + + SymCryptHmacSha3_256ExpandKey( &xKey, SymCryptTestKey32, 16 ); + SymCryptHmacSha3_256( &xKey, SymCryptTestMsg3, sizeof( SymCryptTestMsg3 ), res ); + + SymCryptInjectError( res, sizeof( res ) ); + + if( memcmp( res, hmacSha3_256Kat, sizeof( res ) ) != 0 ) + { + SymCryptFatal( 'hsh3' ); + } + + // + // Normally we would wipe the expanded key structure here, + // but as this is a selftest with known data this is not needed. + // +} diff --git a/libs/symcrypt/lib/hmacsha3_384.c b/libs/symcrypt/lib/hmacsha3_384.c new file mode 100644 index 00000000000..14eef9d09d7 --- /dev/null +++ b/libs/symcrypt/lib/hmacsha3_384.c @@ -0,0 +1,124 @@ +// +// HmacSha3_256.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHmacSha3_384ExpandKey( + _Out_ PSYMCRYPT_HMAC_SHA3_384_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbKey) PCBYTE pbKey, + SIZE_T cbKey) +{ + return SymCryptHmacExpandKey(SymCryptSha3_384Algorithm, &pExpandedKey->generic, pbKey, cbKey); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_384KeyCopy( + _In_ PCSYMCRYPT_HMAC_SHA3_384_EXPANDED_KEY pSrc, + _Out_ PSYMCRYPT_HMAC_SHA3_384_EXPANDED_KEY pDst) +{ + SymCryptHmacKeyCopy(&pSrc->generic, &pDst->generic); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_384( + _In_ PCSYMCRYPT_HMAC_SHA3_384_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_HMAC_SHA3_384_RESULT_SIZE ) PBYTE pbResult ) +{ + SymCryptHmac(&pExpandedKey->generic, pbData, cbData, pbResult); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_384StateCopy( + _In_ PCSYMCRYPT_HMAC_SHA3_384_STATE pSrc, + _In_opt_ PCSYMCRYPT_HMAC_SHA3_384_EXPANDED_KEY pExpandedKey, + _Out_ PSYMCRYPT_HMAC_SHA3_384_STATE pDst ) +{ + SymCryptHmacStateCopy(&pSrc->generic, pExpandedKey == NULL ? NULL : &pExpandedKey->generic, &pDst->generic); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_384Init( + _Out_ PSYMCRYPT_HMAC_SHA3_384_STATE pState, + _In_ PCSYMCRYPT_HMAC_SHA3_384_EXPANDED_KEY pExpandedKey) +{ + SymCryptHmacInit(&pState->generic, &pExpandedKey->generic); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_384Append( + _Inout_ PSYMCRYPT_HMAC_SHA3_384_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + SymCryptHmacAppend(&pState->generic, pbData, cbData); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_384Result( + _Inout_ PSYMCRYPT_HMAC_SHA3_384_STATE pState, + _Out_writes_( SYMCRYPT_HMAC_SHA3_384_RESULT_SIZE ) PBYTE pbResult ) +{ + SymCryptHmacResult(&pState->generic, pbResult); +} + + +const SYMCRYPT_MAC SymCryptHmacSha3_384Algorithm_default = { + SymCryptHmacSha3_384ExpandKey, + SymCryptHmacSha3_384Init, + SymCryptHmacSha3_384Append, + SymCryptHmacSha3_384Result, + sizeof(SYMCRYPT_HMAC_SHA3_384_EXPANDED_KEY), + sizeof(SYMCRYPT_HMAC_SHA3_384_STATE), + SYMCRYPT_HMAC_SHA3_384_RESULT_SIZE, + &SymCryptSha3_384Algorithm, + SYMCRYPT_FIELD_OFFSET( SYMCRYPT_HMAC_SHA3_384_EXPANDED_KEY, generic.outerState ), +}; + +const PCSYMCRYPT_MAC SymCryptHmacSha3_384Algorithm = &SymCryptHmacSha3_384Algorithm_default; + + +static const BYTE hmacSha3_384Kat[48] = +{ + 0x8f, 0xc4, 0x8a, 0x84, 0xb5, 0xa7, 0xa1, 0x36, + 0x3c, 0x3b, 0x4b, 0x21, 0x3c, 0xfb, 0x65, 0x36, + 0xa6, 0x2b, 0xa3, 0x4c, 0x12, 0x33, 0xa1, 0x27, + 0xbc, 0xfc, 0xb2, 0xd7, 0xae, 0xaf, 0x30, 0x6b, + 0xc9, 0xe6, 0x90, 0xfd, 0xf1, 0xfa, 0x12, 0x61, + 0xa4, 0x7e, 0xb2, 0x27, 0x1a, 0xeb, 0xf1, 0x34 +}; + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_384Selftest(void) +{ + SYMCRYPT_HMAC_SHA3_384_EXPANDED_KEY xKey; + BYTE res[SYMCRYPT_HMAC_SHA3_384_RESULT_SIZE]; + + SymCryptHmacSha3_384ExpandKey( &xKey, SymCryptTestKey32, 24 ); + SymCryptHmacSha3_384( &xKey, SymCryptTestMsg3, sizeof( SymCryptTestMsg3 ), res ); + + SymCryptInjectError( res, sizeof( res ) ); + + if( memcmp( res, hmacSha3_384Kat, sizeof( res ) ) != 0 ) + { + SymCryptFatal( 'hsh3' ); + } + + // + // Normally we would wipe the expanded key structure here, + // but as this is a selftest with known data this is not needed. + // +} diff --git a/libs/symcrypt/lib/hmacsha3_512.c b/libs/symcrypt/lib/hmacsha3_512.c new file mode 100644 index 00000000000..5a576e6ae24 --- /dev/null +++ b/libs/symcrypt/lib/hmacsha3_512.c @@ -0,0 +1,126 @@ +// +// HmacSha3_256.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHmacSha3_512ExpandKey( + _Out_ PSYMCRYPT_HMAC_SHA3_512_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbKey) PCBYTE pbKey, + SIZE_T cbKey) +{ + return SymCryptHmacExpandKey(SymCryptSha3_512Algorithm, &pExpandedKey->generic, pbKey, cbKey); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_512KeyCopy( + _In_ PCSYMCRYPT_HMAC_SHA3_512_EXPANDED_KEY pSrc, + _Out_ PSYMCRYPT_HMAC_SHA3_512_EXPANDED_KEY pDst) +{ + SymCryptHmacKeyCopy(&pSrc->generic, &pDst->generic); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_512( + _In_ PCSYMCRYPT_HMAC_SHA3_512_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_HMAC_SHA3_512_RESULT_SIZE ) PBYTE pbResult ) +{ + SymCryptHmac(&pExpandedKey->generic, pbData, cbData, pbResult); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_512StateCopy( + _In_ PCSYMCRYPT_HMAC_SHA3_512_STATE pSrc, + _In_opt_ PCSYMCRYPT_HMAC_SHA3_512_EXPANDED_KEY pExpandedKey, + _Out_ PSYMCRYPT_HMAC_SHA3_512_STATE pDst ) +{ + SymCryptHmacStateCopy(&pSrc->generic, pExpandedKey == NULL ? NULL : &pExpandedKey->generic, &pDst->generic); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_512Init( + _Out_ PSYMCRYPT_HMAC_SHA3_512_STATE pState, + _In_ PCSYMCRYPT_HMAC_SHA3_512_EXPANDED_KEY pExpandedKey) +{ + SymCryptHmacInit(&pState->generic, &pExpandedKey->generic); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_512Append( + _Inout_ PSYMCRYPT_HMAC_SHA3_512_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + SymCryptHmacAppend(&pState->generic, pbData, cbData); +} + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_512Result( + _Inout_ PSYMCRYPT_HMAC_SHA3_512_STATE pState, + _Out_writes_( SYMCRYPT_HMAC_SHA3_512_RESULT_SIZE ) PBYTE pbResult ) +{ + SymCryptHmacResult(&pState->generic, pbResult); +} + + +const SYMCRYPT_MAC SymCryptHmacSha3_512Algorithm_default = { + SymCryptHmacSha3_512ExpandKey, + SymCryptHmacSha3_512Init, + SymCryptHmacSha3_512Append, + SymCryptHmacSha3_512Result, + sizeof(SYMCRYPT_HMAC_SHA3_512_EXPANDED_KEY), + sizeof(SYMCRYPT_HMAC_SHA3_512_STATE), + SYMCRYPT_HMAC_SHA3_512_RESULT_SIZE, + &SymCryptSha3_512Algorithm, + SYMCRYPT_FIELD_OFFSET( SYMCRYPT_HMAC_SHA3_512_EXPANDED_KEY, generic.outerState ), +}; + +const PCSYMCRYPT_MAC SymCryptHmacSha3_512Algorithm = &SymCryptHmacSha3_512Algorithm_default; + + +static const BYTE hmacSha3_512Kat[64] = +{ + 0x83, 0x3b, 0x31, 0xe7, 0x77, 0xd6, 0xb3, 0x3d, + 0x75, 0x23, 0xa5, 0x79, 0xcc, 0x3b, 0xeb, 0x27, + 0x6f, 0xd6, 0x52, 0x57, 0x54, 0xc4, 0xc5, 0x4b, + 0x2d, 0x5a, 0x34, 0x7d, 0x36, 0x24, 0x07, 0x91, + 0x7a, 0x3c, 0x62, 0x6e, 0x7e, 0xdb, 0x8e, 0x49, + 0x3b, 0x42, 0xc8, 0xe5, 0xa6, 0x96, 0xd5, 0xe6, + 0x6b, 0xa7, 0xad, 0x20, 0x00, 0xeb, 0x6c, 0xff, + 0x76, 0xcb, 0x1e, 0xc0, 0x30, 0x13, 0x0e, 0x81 +}; + +VOID +SYMCRYPT_CALL +SymCryptHmacSha3_512Selftest(void) +{ + SYMCRYPT_HMAC_SHA3_512_EXPANDED_KEY xKey; + BYTE res[SYMCRYPT_HMAC_SHA3_512_RESULT_SIZE]; + + SymCryptHmacSha3_512ExpandKey( &xKey, SymCryptTestKey32, 32 ); + SymCryptHmacSha3_512( &xKey, SymCryptTestMsg3, sizeof( SymCryptTestMsg3 ), res ); + + SymCryptInjectError( res, sizeof( res ) ); + + if( memcmp( res, hmacSha3_512Kat, sizeof( res ) ) != 0 ) + { + SymCryptFatal( 'hsh3' ); + } + + // + // Normally we would wipe the expanded key structure here, + // but as this is a selftest with known data this is not needed. + // +} diff --git a/libs/symcrypt/lib/hmacsha512.c b/libs/symcrypt/lib/hmacsha512.c new file mode 100644 index 00000000000..c91dfab7e1a --- /dev/null +++ b/libs/symcrypt/lib/hmacsha512.c @@ -0,0 +1,59 @@ +// +// HmacSha512.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +#define ALG SHA512 +#define Alg Sha512 +#define SET_DATALENGTH( state, len ) {state.dataLengthL = len; state.dataLengthH = 0;} +#include "hmac_pattern.c" +#undef SET_DATALENGTH +#undef Alg +#undef ALG + +const SYMCRYPT_MAC SymCryptHmacSha512Algorithm_default = { + SymCryptHmacSha512ExpandKey, + SymCryptHmacSha512Init, + SymCryptHmacSha512Append, + SymCryptHmacSha512Result, + sizeof(SYMCRYPT_HMAC_SHA512_EXPANDED_KEY), + sizeof(SYMCRYPT_HMAC_SHA512_STATE), + SYMCRYPT_HMAC_SHA512_RESULT_SIZE, + &SymCryptSha512Algorithm, + SYMCRYPT_FIELD_OFFSET( SYMCRYPT_HMAC_SHA512_EXPANDED_KEY, outerState ), +}; + +const PCSYMCRYPT_MAC SymCryptHmacSha512Algorithm = &SymCryptHmacSha512Algorithm_default; + +static const BYTE hmacSha512Kat[64] = { + 0x07, 0x64, 0xa6, 0x58, 0xeb, 0x3e, 0x2f, 0xb0, 0x2c, 0x06, 0x72, 0x93, 0xcd, 0xaa, 0x3c, 0x05, + 0x28, 0x73, 0x15, 0xf2, 0xd3, 0xb4, 0x5a, 0x28, 0x10, 0x20, 0x1e, 0x26, 0xc3, 0x89, 0x35, 0x48, + 0xe9, 0xea, 0xca, 0x72, 0xf0, 0x2e, 0x04, 0x19, 0x20, 0x31, 0x71, 0x68, 0xb5, 0x7a, 0x86, 0x40, + 0x29, 0x1b, 0x3b, 0xb7, 0xaa, 0x4a, 0x5f, 0xaf, 0x80, 0x26, 0xb4, 0xad, 0x23, 0x5a, 0xc4, 0x25, +}; + + +VOID +SYMCRYPT_CALL +SymCryptHmacSha512Selftest(void) +{ + SYMCRYPT_HMAC_SHA512_EXPANDED_KEY xKey; + BYTE res[SYMCRYPT_HMAC_SHA512_RESULT_SIZE]; + + SymCryptHmacSha512ExpandKey( &xKey, SymCryptTestKey32, 16 ); + SymCryptHmacSha512( &xKey, SymCryptTestMsg3, sizeof( SymCryptTestMsg3 ), res ); + + SymCryptInjectError( res, sizeof( res ) ); + if( memcmp( res, hmacSha512Kat, sizeof( res ) ) != 0 ) + { + SymCryptFatal( 'hsh5' ); + } + + // + // Normally we would wipe the expanded key structure here, + // but as this is a selftest with known data this is not needed. + // +} diff --git a/libs/symcrypt/lib/hmacsha512_224.c b/libs/symcrypt/lib/hmacsha512_224.c new file mode 100644 index 00000000000..a36556485dc --- /dev/null +++ b/libs/symcrypt/lib/hmacsha512_224.c @@ -0,0 +1,62 @@ +// +// HmacSha512_224.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +#define SymCryptSha512_224AppendBlocks SymCryptSha512AppendBlocks + +#define ALG SHA512_224 +#define Alg Sha512_224 +#define SET_DATALENGTH( state, len ) {state.dataLengthL = len; state.dataLengthH = 0;} +#include "hmac_pattern.c" +#undef SET_DATALENGTH +#undef Alg +#undef ALG + +const SYMCRYPT_MAC SymCryptHmacSha512_224Algorithm_default = { + SymCryptHmacSha512_224ExpandKey, + SymCryptHmacSha512_224Init, + SymCryptHmacSha512_224Append, + SymCryptHmacSha512_224Result, + sizeof(SYMCRYPT_HMAC_SHA512_224_EXPANDED_KEY), + sizeof(SYMCRYPT_HMAC_SHA512_224_STATE), + SYMCRYPT_HMAC_SHA512_224_RESULT_SIZE, + &SymCryptSha512_224Algorithm, + SYMCRYPT_FIELD_OFFSET( SYMCRYPT_HMAC_SHA512_224_EXPANDED_KEY, outerState ), +}; + +const PCSYMCRYPT_MAC SymCryptHmacSha512_224Algorithm = &SymCryptHmacSha512_224Algorithm_default; + +static const BYTE hmacSha512_224Kat[28] = { + 0x62, 0xc9, 0x59, 0xc7, 0x5b, 0x3c, 0xb2, 0xaf, + 0x95, 0xf5, 0x59, 0x73, 0x2c, 0x46, 0x1d, 0x72, + 0x06, 0x9e, 0xf9, 0x52, 0x9a, 0x8d, 0x84, 0x1a, + 0x73, 0x97, 0xa6, 0x9c +}; + + +VOID +SYMCRYPT_CALL +SymCryptHmacSha512_224Selftest(void) +{ + SYMCRYPT_HMAC_SHA512_224_EXPANDED_KEY xKey; + BYTE res[SYMCRYPT_HMAC_SHA512_224_RESULT_SIZE]; + + SymCryptHmacSha512_224ExpandKey( &xKey, SymCryptTestKey32, 16 ); + SymCryptHmacSha512_224( &xKey, SymCryptTestMsg3, sizeof( SymCryptTestMsg3 ), res ); + + SymCryptInjectError( res, sizeof( res ) ); + + if( memcmp( res, hmacSha512_224Kat, sizeof( res ) ) != 0 ) + { + SymCryptFatal( 'hsh4' ); + } + + // + // Normally we would wipe the expanded key structure here, + // but as this is a selftest with known data this is not needed. + // +} diff --git a/libs/symcrypt/lib/hmacsha512_256.c b/libs/symcrypt/lib/hmacsha512_256.c new file mode 100644 index 00000000000..a7a49413cda --- /dev/null +++ b/libs/symcrypt/lib/hmacsha512_256.c @@ -0,0 +1,62 @@ +// +// HmacSha512_256.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +#define SymCryptSha512_256AppendBlocks SymCryptSha512AppendBlocks + +#define ALG SHA512_256 +#define Alg Sha512_256 +#define SET_DATALENGTH( state, len ) {state.dataLengthL = len; state.dataLengthH = 0;} +#include "hmac_pattern.c" +#undef SET_DATALENGTH +#undef Alg +#undef ALG + +const SYMCRYPT_MAC SymCryptHmacSha512_256Algorithm_default = { + SymCryptHmacSha512_256ExpandKey, + SymCryptHmacSha512_256Init, + SymCryptHmacSha512_256Append, + SymCryptHmacSha512_256Result, + sizeof(SYMCRYPT_HMAC_SHA512_256_EXPANDED_KEY), + sizeof(SYMCRYPT_HMAC_SHA512_256_STATE), + SYMCRYPT_HMAC_SHA512_256_RESULT_SIZE, + &SymCryptSha512_256Algorithm, + SYMCRYPT_FIELD_OFFSET( SYMCRYPT_HMAC_SHA512_256_EXPANDED_KEY, outerState ), +}; + +const PCSYMCRYPT_MAC SymCryptHmacSha512_256Algorithm = &SymCryptHmacSha512_256Algorithm_default; + +static const BYTE hmacSha512_256Kat[32] = { + 0x79, 0x44, 0xb9, 0x97, 0xc0, 0xaa, 0xf7, 0x11, + 0xdd, 0xb3, 0x78, 0x60, 0x68, 0xdb, 0x2b, 0xa1, + 0x40, 0x80, 0x4f, 0xdc, 0xb7, 0x02, 0x7b, 0x6a, + 0xe9, 0x9f, 0x5a, 0x38, 0xc8, 0x28, 0x67, 0x4c +}; + + +VOID +SYMCRYPT_CALL +SymCryptHmacSha512_256Selftest(void) +{ + SYMCRYPT_HMAC_SHA512_256_EXPANDED_KEY xKey; + BYTE res[SYMCRYPT_HMAC_SHA512_256_RESULT_SIZE]; + + SymCryptHmacSha512_256ExpandKey( &xKey, SymCryptTestKey32, 16 ); + SymCryptHmacSha512_256( &xKey, SymCryptTestMsg3, sizeof( SymCryptTestMsg3 ), res ); + + SymCryptInjectError( res, sizeof( res ) ); + + if( memcmp( res, hmacSha512_256Kat, sizeof( res ) ) != 0 ) + { + SymCryptFatal( 'hsh4' ); + } + + // + // Normally we would wipe the expanded key structure here, + // but as this is a selftest with known data this is not needed. + // +} diff --git a/libs/symcrypt/lib/kmac.c b/libs/symcrypt/lib/kmac.c new file mode 100644 index 00000000000..ae82af143c4 --- /dev/null +++ b/libs/symcrypt/lib/kmac.c @@ -0,0 +1,123 @@ +// +// kmac.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + + +// +// KMAC128 +// +#define Alg Kmac128 +#define ALG KMAC128 +#define SYMCRYPT_CSHAKEXXX_INIT SymCryptCShake128Init +#define SYMCRYPT_CSHAKEXXX_STATE SYMCRYPT_CSHAKE128_STATE +#define SYMCRYPT_KMACXXX_RESULT_SIZE SYMCRYPT_KMAC128_RESULT_SIZE +#include "kmac_pattern.c" +#undef SYMCRYPT_KMACXXX_RESULT_SIZE +#undef SYMCRYPT_CSHAKEXXX_STATE +#undef SYMCRYPT_CSHAKEXXX_INIT +#undef ALG +#undef Alg + +// MAC interface +const SYMCRYPT_MAC SymCryptKmac128Algorithm_Default = { + SymCryptKmac128ExpandKey, + SymCryptKmac128Init, + SymCryptKmac128Append, + SymCryptKmac128Result, + sizeof(SYMCRYPT_KMAC128_EXPANDED_KEY), + sizeof(SYMCRYPT_KMAC128_STATE), + SYMCRYPT_KMAC128_RESULT_SIZE, + NULL, // ppHashAlgorithm + 0, // outerChainingStateOffset +}; + +const PCSYMCRYPT_MAC SymCryptKmac128Algorithm = &SymCryptKmac128Algorithm_Default; + +static const BYTE kmac128KATAnswer[SYMCRYPT_KMAC128_RESULT_SIZE] = { + 0xea, 0xe9, 0xde, 0xd3, 0xee, 0x2f, 0x34, 0x8a, + 0xd6, 0xd2, 0xcb, 0x70, 0x4b, 0xba, 0xd4, 0x47, + 0x15, 0x32, 0x46, 0x82, 0x8e, 0x41, 0x3a, 0xf5, + 0xf5, 0x62, 0x96, 0x1a, 0xf7, 0x67, 0x48, 0xc1 +}; + +VOID +SYMCRYPT_CALL +SymCryptKmac128Selftest(void) +{ + BYTE result[SYMCRYPT_KMAC128_RESULT_SIZE]; + static const unsigned char Sstr[] = { 'S' }; + SYMCRYPT_KMAC128_EXPANDED_KEY expandedKey; + + SymCryptKmac128ExpandKeyEx(&expandedKey, SymCryptTestKey32, 16, Sstr, sizeof(Sstr)); + + SymCryptKmac128(&expandedKey, SymCryptTestMsg16, sizeof(SymCryptTestMsg16), result); + + SymCryptInjectError(result, sizeof(result)); + + if (memcmp(result, kmac128KATAnswer, sizeof(result)) != 0) + { + SymCryptFatal('kmac'); + } +} + + +// +// KMAC256 +// +#define Alg Kmac256 +#define ALG KMAC256 +#define SYMCRYPT_CSHAKEXXX_INIT SymCryptCShake256Init +#define SYMCRYPT_CSHAKEXXX_STATE SYMCRYPT_CSHAKE256_STATE +#define SYMCRYPT_KMACXXX_RESULT_SIZE SYMCRYPT_KMAC256_RESULT_SIZE +#include "kmac_pattern.c" +#undef SYMCRYPT_KMACXXX_RESULT_SIZE +#undef SYMCRYPT_CSHAKEXXX_STATE +#undef SYMCRYPT_CSHAKEXXX_INIT +#undef ALG +#undef Alg + +// MAC interface +const SYMCRYPT_MAC SymCryptKmac256Algorithm_Default = { + SymCryptKmac256ExpandKey, + SymCryptKmac256Init, + SymCryptKmac256Append, + SymCryptKmac256Result, + sizeof(SYMCRYPT_KMAC256_EXPANDED_KEY), + sizeof(SYMCRYPT_KMAC256_STATE), + SYMCRYPT_KMAC256_RESULT_SIZE, + NULL, // ppHashAlgorithm + 0, // outerChainingStateOffset +}; + +const PCSYMCRYPT_MAC SymCryptKmac256Algorithm = &SymCryptKmac256Algorithm_Default; + +static const BYTE kmac256KATAnswer[SYMCRYPT_KMAC256_RESULT_SIZE] = { + 0xa9, 0x1d, 0x09, 0x00, 0x71, 0x0c, 0x63, 0xc5, 0x0f, 0xb6, 0x4d, 0xfa, 0xd8, 0x75, 0x4d, 0x78, + 0x2d, 0xc0, 0x82, 0x4b, 0x87, 0x97, 0xda, 0xf2, 0x36, 0xde, 0xe9, 0x35, 0x69, 0x2e, 0x50, 0x81, + 0x0a, 0xea, 0x3b, 0x05, 0xaf, 0x1b, 0x82, 0x3b, 0xc8, 0xa1, 0x9e, 0xe9, 0x9c, 0x5f, 0xd5, 0x5a, + 0x20, 0x92, 0x89, 0x46, 0xa4, 0xe4, 0x1a, 0xdd, 0x3d, 0xb6, 0x47, 0x4d, 0xf2, 0xa5, 0xfc, 0x73 +}; + +VOID +SYMCRYPT_CALL +SymCryptKmac256Selftest(void) +{ + BYTE result[SYMCRYPT_KMAC256_RESULT_SIZE]; + static const unsigned char Sstr[] = { 'S' }; + SYMCRYPT_KMAC256_EXPANDED_KEY expandedKey; + + SymCryptKmac256ExpandKeyEx(&expandedKey, SymCryptTestKey32, 32, Sstr, sizeof(Sstr)); + + SymCryptKmac256(&expandedKey, SymCryptTestMsg16, sizeof(SymCryptTestMsg16), result); + + SymCryptInjectError(result, sizeof(result)); + + if (memcmp(result, kmac256KATAnswer, sizeof(result)) != 0) + { + SymCryptFatal('kmac'); + } +} diff --git a/libs/symcrypt/lib/kmac_pattern.c b/libs/symcrypt/lib/kmac_pattern.c new file mode 100644 index 00000000000..1b4aa0604bc --- /dev/null +++ b/libs/symcrypt/lib/kmac_pattern.c @@ -0,0 +1,218 @@ +// +// kmac_pattern.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#if 0 +#pragma makedep header +#endif + +// +// This source file implements KMAC128 and KMAC256 +// +// See the symcrypt.h file for documentation on what the various functions do. +// + + +// +// SymCryptKmac +// +VOID +SYMCRYPT_CALL +SYMCRYPT_Xxx( + _In_ PCSYMCRYPT_XXX_EXPANDED_KEY pExpandedKey, + _In_reads_bytes_( cbInput ) PCBYTE pbInput, + SIZE_T cbInput, + _Out_writes_bytes_( SYMCRYPT_KMACXXX_RESULT_SIZE ) PBYTE pbResult) +{ + SYMCRYPT_XXX_STATE state; + + SYMCRYPT_XxxInit(&state, pExpandedKey); + SYMCRYPT_XxxAppend(&state, pbInput, cbInput); + SYMCRYPT_XxxResult(&state, pbResult); +} + +// +// SymCryptKmacEx +// +VOID +SYMCRYPT_CALL +SYMCRYPT_XxxEx( + _In_ PCSYMCRYPT_XXX_EXPANDED_KEY pExpandedKey, + _In_reads_bytes_( cbInput ) PCBYTE pbInput, + SIZE_T cbInput, + _Out_writes_bytes_( cbResult ) PBYTE pbResult, + SIZE_T cbResult) +{ + SYMCRYPT_XXX_STATE state; + + SYMCRYPT_XxxInit(&state, pExpandedKey); + SYMCRYPT_XxxAppend(&state, pbInput, cbInput); + SYMCRYPT_XxxResultEx(&state, pbResult, cbResult); +} + + +// +// SymCryptKmacExpandKey +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SYMCRYPT_XxxExpandKey( + _Out_ PSYMCRYPT_XXX_EXPANDED_KEY pExpandedKey, + _In_reads_bytes_(cbKey) PCBYTE pbKey, + SIZE_T cbKey) +{ + return SYMCRYPT_XxxExpandKeyEx(pExpandedKey, pbKey, cbKey, NULL, 0); +} + +// +// SymCryptKmacExpandKeyEx +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SYMCRYPT_XxxExpandKeyEx( + _Out_ PSYMCRYPT_XXX_EXPANDED_KEY pExpandedKey, + _In_reads_bytes_( cbKey ) PCBYTE pbKey, + SIZE_T cbKey, + _In_reads_bytes_( cbCustomizationString ) PCBYTE pbCustomizationString, + SIZE_T cbCustomizationString) +{ + static const BYTE nameString[] = { 0x4b, 0x4d, 0x41, 0x43 }; // "KMAC" + + C_ASSERT( sizeof(SYMCRYPT_XXX_EXPANDED_KEY) == sizeof(SYMCRYPT_CSHAKEXXX_STATE) ); + + SYMCRYPT_CSHAKEXXX_INIT( (SYMCRYPT_CSHAKEXXX_STATE*)pExpandedKey, nameString, sizeof(nameString), pbCustomizationString, cbCustomizationString); + + SYMCRYPT_KECCAK_STATE* pks = &pExpandedKey->ks; + + // byte_pad( encode_string( K ) ) + SymCryptKeccakAppendEncodeTimes8(pks, pks->inputBlockSize / 8, TRUE); + SymCryptKeccakAppendEncodedString(pks, pbKey, cbKey); + + if (pks->stateIndex != 0) + { + SymCryptKeccakZeroAppendBlock(pks); + } + + return SYMCRYPT_NO_ERROR; +} + +// +// SymCryptKmacInit +// +VOID +SYMCRYPT_CALL +SYMCRYPT_XxxInit( + _Out_ PSYMCRYPT_XXX_STATE pState, + _In_ PCSYMCRYPT_XXX_EXPANDED_KEY pExpandedKey) +{ + C_ASSERT(sizeof(*pState) == sizeof(*pExpandedKey)); + + SYMCRYPT_CHECK_MAGIC(pExpandedKey); + memcpy(pState, pExpandedKey, sizeof(*pState)); + SYMCRYPT_SET_MAGIC(pState); +} + +// +// SymCryptKmacAppend +// +VOID +SYMCRYPT_CALL +SYMCRYPT_XxxAppend( + _Inout_ PSYMCRYPT_XXX_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + SYMCRYPT_ASSERT(!pState->ks.squeezeMode); + SymCryptKeccakAppend(&pState->ks, pbData, cbData); +} + +// +// SymCryptKmacExtract +// +VOID +SYMCRYPT_CALL +SYMCRYPT_XxxExtract( + _Inout_ PSYMCRYPT_XXX_STATE pState, + _Out_writes_( cbOutput ) PBYTE pbOutput, + SIZE_T cbOutput, + BOOLEAN bWipe) +{ + // This function uses KMAC in XOF mode. + // + // If this is the first time Extract is being called, append right_encode(0) + // to indicate that we're in XOF mode. This padding will be applied only once + // as SymCryptKeccakExtract will transition the state to squeeze mode. + if (!pState->ks.squeezeMode) + { + SymCryptKeccakAppendEncodeTimes8(&pState->ks, 0, FALSE); + } + + SymCryptKeccakExtract(&pState->ks, pbOutput, cbOutput, bWipe); +} + +// +// SymCryptKmacResult +// +VOID +SYMCRYPT_CALL +SYMCRYPT_XxxResult( + _Inout_ PSYMCRYPT_XXX_STATE pState, + _Out_writes_( SYMCRYPT_KMACXXX_RESULT_SIZE ) PBYTE pbOutput) +{ + SYMCRYPT_XxxResultEx(pState, pbOutput, SYMCRYPT_KMACXXX_RESULT_SIZE); +} + + +// +// SymCryptKmacResultEx +// +VOID +SYMCRYPT_CALL +SYMCRYPT_XxxResultEx( + _Inout_ PSYMCRYPT_XXX_STATE pState, + _Out_writes_( cbOutput ) PBYTE pbOutput, + SIZE_T cbOutput) +{ + // Result and ResultEx functions are used to extract data only once. + // KMAC requires the output length to be encoded and appended to the + // end of the input before the state switches to squeeze mode. + // + // If Result or ResultEx is called after an Extract call with bWipe=FALSE, + // this means KMAC was used in XOF mode and length padding has already been + // applied. In this case, Result and ResultEx functions extract data one last + // time in XOF mode and wipe the state afterwards. + + if (!pState->ks.squeezeMode) + { + // Append right_encode(L) + SymCryptKeccakAppendEncodeTimes8(&pState->ks, cbOutput, FALSE); + } + + SymCryptKeccakExtract(&pState->ks, pbOutput, cbOutput, TRUE); +} + +// +// SymCryptKmacKeyCopy +// +VOID +SYMCRYPT_CALL +SYMCRYPT_XxxKeyCopy(_In_ PCSYMCRYPT_XXX_EXPANDED_KEY pSrc, _Out_ PSYMCRYPT_XXX_EXPANDED_KEY pDst) +{ + SYMCRYPT_CHECK_MAGIC(pSrc); + *pDst = *pSrc; + SYMCRYPT_SET_MAGIC(pDst); +} + +// +// SymCryptKmacStateCopy +// +VOID +SYMCRYPT_CALL +SYMCRYPT_XxxStateCopy(_In_ const SYMCRYPT_XXX_STATE* pSrc, _Out_ SYMCRYPT_XXX_STATE* pDst) +{ + SYMCRYPT_CHECK_MAGIC(pSrc); + *pDst = *pSrc; + SYMCRYPT_SET_MAGIC(pDst); +} diff --git a/libs/symcrypt/lib/libmain.c b/libs/symcrypt/lib/libmain.c new file mode 100644 index 00000000000..c25c56e6b3e --- /dev/null +++ b/libs/symcrypt/lib/libmain.c @@ -0,0 +1,539 @@ +// +// libmain.c +// General routines for the SymCrypt library +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +#include "C_asm_shared.inc" + +#include "buildInfo.h" + +// The following global g_SymCryptFlags has to be at least 32 +// bits because the iOS environment has interlocked function +// support for variables of size at least 32 bits. +// The relevant function is OSAtomicOr32Barrier. +UINT32 g_SymCryptFlags = 0; + +SYMCRYPT_CPU_FEATURES g_SymCryptCpuFeaturesNotPresent = (SYMCRYPT_CPU_FEATURES) ~0; +SYMCRYPT_CPU_FEATURES g_SymCryptCpuFeaturesPresentCheck = 0; + +#if SYMCRYPT_DEBUG + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptLibraryWasNotInitialized(void) +{ + SymCryptFatal( 'init' ); // Function name helps figure out what the problem is. +} + +#endif + +const CHAR * const SymCryptBuildString = + "v" SYMCRYPT_BUILD_INFO_VERSION + "_" SYMCRYPT_BUILD_INFO_BRANCH + "_" SYMCRYPT_BUILD_INFO_COMMIT + "_" SYMCRYPT_BUILD_INFO_TIMESTAMP; + +VOID +SYMCRYPT_CALL +SymCryptInitEnvCommon( UINT32 version ) +// Returns TRUE if the initialization steps have to be performed. +{ + UINT32 tmp; + + const CHAR * p; + + // Assertion that verifies that the calling application was compiled with + // the same version header files as the library. + if( version != SYMCRYPT_API_VERSION ) + { + SymCryptFatal( 'apiv' ); + } + + // + // Use an interlocked to set the flag in case we add other flags + // that are modified by different threads. + // + SYMCRYPT_ATOMIC_OR32_PRE_RELAXED( &g_SymCryptFlags, SYMCRYPT_FLAG_LIB_INITIALIZED ); + + // + // Do a forced write of our code version. This ensures that the code + // version is part of the binary, so we can look at a binary and figure + // out which version of SymCrypt it was linked with. + // + SYMCRYPT_FORCE_WRITE32( &tmp, SYMCRYPT_API_VERSION ); + + // + // Force the build string to be in memory, because otherwise the + // compiler might get smart and remove it. + // This ensures we can always track back to the SymCrypt source code from + // any binary that links this library + // + for( p = SymCryptBuildString; *p!=0; p++ ) + { + SYMCRYPT_FORCE_WRITE8( (PBYTE) &tmp, *p ); + } + + // + // Make an inverted copy of the CPU detection results. + // This helps us diagnose corruption of our flags + // Force-write otherwise the compiler optimizes it away + // + SYMCRYPT_FORCE_WRITE32( &g_SymCryptCpuFeaturesPresentCheck, ~g_SymCryptCpuFeaturesNotPresent ); + + // + // Test that the C and assembler code agree on the various structure member offsets. + // This gets optimized away in FRE builds as all the values are compile-time computable. + // +#define SYMCRYPT_CHECK_ASM_OFFSET( a, b ) if( (a) != (b) ) {SymCryptFatal( b );} + SYMCRYPT_CHECK_ASM_OFFSETS; +#undef SYMCRYPT_CHECK_ASM_OFFSET +} + +_Analysis_noreturn_ +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptFatalHang( UINT32 fatalCode ) +// +// This function is used by the environment-specific fatal code +// as a last resort when none of the other fatal methods work. +// +{ + UINT32 fcode; + + // + // Put the fatal code in a location we can find + // + SYMCRYPT_FORCE_WRITE32( &fcode, fatalCode ); + +fatalInfiniteLoop: + goto fatalInfiniteLoop; +} + +#if 0 /* SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM | SYMCRYPT_CPU_ARM64 */ + +VOID +SYMCRYPT_CALL +SymCryptWipeAsm( _Out_writes_bytes_( cbData ) PVOID pbData, SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptWipe( _Out_writes_bytes_( cbData ) PVOID pbData, SIZE_T cbData ) +{ + SymCryptWipeAsm( pbData, cbData ); +} + +#else +// +// Generic but slow wipe routine. +// +VOID +SYMCRYPT_CALL +SymCryptWipe( _Out_writes_bytes_( cbData ) PVOID pbData, SIZE_T cbData ) +{ + volatile BYTE * p = (volatile BYTE *) pbData; + SIZE_T i; + + for( i=0; i<cbData; i++ ){ + p[i] = 0; + } + +} +#endif + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_ARM +VOID +SYMCRYPT_CALL +SymCryptXorBytes( + _In_reads_( cbBytes ) PCBYTE pbSrc1, + _In_reads_( cbBytes ) PCBYTE pbSrc2, + _Out_writes_( cbBytes ) PBYTE pbResult, + SIZE_T cbBytes ) +{ + SIZE_T i; + + if( cbBytes == 16 ) + { + PCUINT32 s1 = (PCUINT32) pbSrc1; + PCUINT32 s2 = (PCUINT32) pbSrc2; + PUINT32 d = (PUINT32) pbResult; + + d[0] = s1[0] ^ s2[0]; + d[1] = s1[1] ^ s2[1]; + d[2] = s1[2] ^ s2[2]; + d[3] = s1[3] ^ s2[3]; + } + else + { + i = 0; + while( i + 3 < cbBytes ) + { + *(UINT32 *)&pbResult[i] = *(UINT32 *)&pbSrc1[i] ^ *(UINT32 *)&pbSrc2[i]; + i += 4; + } + + while( i < cbBytes ) + { + pbResult[i] = pbSrc1[i] ^ pbSrc2[i]; + i++; + } + } +} + +#elif SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM64 + +VOID +SYMCRYPT_CALL +SymCryptXorBytes( + _In_reads_( cbBytes ) PCBYTE pbSrc1, + _In_reads_( cbBytes ) PCBYTE pbSrc2, + _Out_writes_( cbBytes ) PBYTE pbResult, + SIZE_T cbBytes ) +{ + if( cbBytes == 16 ) + { + PCUINT64 s1 = (PCUINT64) pbSrc1; + PCUINT64 s2 = (PCUINT64) pbSrc2; + PUINT64 d = (PUINT64) pbResult; + + d[0] = s1[0] ^ s2[0]; + d[1] = s1[1] ^ s2[1]; + } + else + { + while( cbBytes >= 8 ) + { + *(UINT64 *)pbResult = *(UINT64 *)pbSrc1 ^ *(UINT64 *)pbSrc2; + pbSrc1 += 8; + pbSrc2 += 8; + pbResult += 8; + cbBytes -= 8; + } + + while( cbBytes > 0 ) + { + *pbResult = *pbSrc1 ^ *pbSrc2; + pbResult++; + pbSrc1++; + pbSrc2++; + cbBytes--; + } + } +} + + +#else +// +// Generic code +// +VOID +SYMCRYPT_CALL +SymCryptXorBytes( + _In_reads_( cbBytes ) PCBYTE pbSrc1, + _In_reads_( cbBytes ) PCBYTE pbSrc2, + _Out_writes_( cbBytes ) PBYTE pbResult, + SIZE_T cbBytes ) +{ + SIZE_T i; + + for( i=0; i<cbBytes; i++ ) + { + pbResult[i] = pbSrc1[i] ^ pbSrc2[i]; + } +} +#endif + + +// +// Generic LSB/MSBfirst load/store code for variable-sized buffers. +// These implementations are inefficient and not side-channel safe. +// This is sufficient for the current usage (typically to allow +// callers to read/write RSA public exponents from/to variable-sized +// buffers). +// Consider upgrading them in future. +// + +UINT32 +SymCryptUint32Bitsize( UINT32 value ) +// +// Some CPUs/compilers have intrinsics for this, +// but this is portable and works everywhere. +// +{ + UINT32 res; + + res = 0; + while( value != 0 ) + { + res += 1; + value >>= 1; + } + + return res; +} + +UINT32 +SymCryptUint64Bitsize( UINT64 value ) +{ + UINT32 res; + UINT32 upper; + + upper = (UINT32)(value >> 32); + + if( upper == 0 ) + { + res = SymCryptUint32Bitsize( (UINT32) value ); + } else { + res = 32 + SymCryptUint32Bitsize( upper ); + } + + return res; +} + +UINT32 +SymCryptUint32Bytesize( UINT32 value ) +{ + if( value == 0 ) + { + return 0; + } + if( value < 0x100 ) + { + return 1; + } + if( value < 0x10000 ) + { + return 2; + } + if( value < 0x1000000 ) + { + return 3; + } + return 4; +} + +UINT32 +SymCryptUint64Bytesize( UINT64 value ) +{ + UINT32 res; + UINT32 upper; + + upper = (UINT32)(value >> 32); + + if( upper == 0 ) + { + res = SymCryptUint32Bytesize( (UINT32) value ); + } else { + res = 4 + SymCryptUint32Bytesize( upper ); + } + + return res; +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLoadLsbFirstUint32( + _In_reads_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + _Out_ PUINT32 pDst ) +{ + UINT64 v64; + UINT32 v32; + SYMCRYPT_ERROR scError; + + scError = SymCryptLoadLsbFirstUint64( pbSrc, cbSrc, &v64 ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + v32 = (UINT32) v64; + if( v32 != v64 ) + { + scError = SYMCRYPT_VALUE_TOO_LARGE; + goto cleanup; + } + + *pDst = v32; + +cleanup: + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLoadLsbFirstUint64( + _In_reads_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + _Out_ PUINT64 pDst ) +{ + UINT64 v; + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + v = 0; + pbSrc += cbSrc; + while( cbSrc > 8 ) + { + if( *--pbSrc != 0 ) + { + scError = SYMCRYPT_VALUE_TOO_LARGE; + goto cleanup; + } + cbSrc--; + } + + while( cbSrc > 0 ) + { + v = (v << 8) | *--pbSrc; + cbSrc--; + } + + *pDst = v; + +cleanup: + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLoadMsbFirstUint32( + _In_reads_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + _Out_ PUINT32 pDst ) +{ + UINT64 v64; + UINT32 v32; + SYMCRYPT_ERROR scError; + + scError = SymCryptLoadMsbFirstUint64( pbSrc, cbSrc, &v64 ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + v32 = (UINT32) v64; + if( v32 != v64 ) + { + scError = SYMCRYPT_VALUE_TOO_LARGE; + goto cleanup; + } + + *pDst = v32; + +cleanup: + return scError; +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLoadMsbFirstUint64( + _In_reads_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + _Out_ PUINT64 pDst ) +{ + UINT64 v; + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + v = 0; + while( cbSrc > 8 ) + { + if( *pbSrc++ != 0 ) + { + scError = SYMCRYPT_VALUE_TOO_LARGE; + goto cleanup; + } + cbSrc--; + } + + while( cbSrc > 0 ) + { + v = (v << 8) | *pbSrc++; + cbSrc--; + } + + *pDst = v; + +cleanup: + return scError; +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptStoreLsbFirstUint32( + UINT32 src, + _Out_writes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst ) +{ + return SymCryptStoreLsbFirstUint64( src, pbDst, cbDst ); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptStoreLsbFirstUint64( + UINT64 src, + _Out_writes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + while( cbDst > 0 ) + { + *pbDst++ = (BYTE) src; + src >>= 8; + cbDst--; + } + + if( src != 0 ) + { + scError = SYMCRYPT_VALUE_TOO_LARGE; + goto cleanup; + } + +cleanup: + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptStoreMsbFirstUint32( + UINT32 src, + _Out_writes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst ) +{ + return SymCryptStoreMsbFirstUint64( src, pbDst, cbDst ); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptStoreMsbFirstUint64( + UINT64 src, + _Out_writes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + pbDst += cbDst; + while( cbDst > 0 ) + { + *--pbDst = (BYTE) src; + src >>= 8; + cbDst--; + } + + if( src != 0 ) + { + scError = SYMCRYPT_VALUE_TOO_LARGE; + goto cleanup; + } + +cleanup: + return scError; +} diff --git a/libs/symcrypt/lib/lms.c b/libs/symcrypt/lib/lms.c new file mode 100644 index 00000000000..50879e89f9e --- /dev/null +++ b/libs/symcrypt/lib/lms.c @@ -0,0 +1,1162 @@ +// +// lms.c LMS implementation +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +// +// See the symcrypt.h file for documentation on what the various functions do. +// +static const PCSYMCRYPT_HASH* LmsHashObjects[] = { + &SymCryptSha256Algorithm, // 0 + &SymCryptShake256HashAlgorithm, // 1 +}; + +typedef struct _SYMCRYPT_LMS_PARAMETER_PREDEFINED +{ + SYMCRYPT_LMS_ALGID lmsAlgId; + + // output length + UINT8 cbHashOutput; + + // total tree height + UINT8 nTreeHeight; + + // hash function index + UINT8 nHashIdx; + +} SYMCRYPT_LMS_PARAMETER_PREDEFINED, * PSYMCRYPT_LMS_PARAMETER_PREDEFINED; + +typedef const SYMCRYPT_LMS_PARAMETER_PREDEFINED* PCSYMCRYPT_LMS_PARAMETER_PREDEFINED; + + +static const SYMCRYPT_LMS_PARAMETER_PREDEFINED LmsParametersPredefined[] = { + + // algId m h HIdx + { SYMCRYPT_LMS_SHA256_M32_H5, 32, 5 , 0 }, + { SYMCRYPT_LMS_SHA256_M32_H10, 32, 10, 0 }, + { SYMCRYPT_LMS_SHA256_M32_H15, 32, 15, 0 }, + { SYMCRYPT_LMS_SHA256_M32_H20, 32, 20, 0 }, + { SYMCRYPT_LMS_SHA256_M32_H25, 32, 25, 0 }, + { SYMCRYPT_LMS_SHAKE_M32_H5, 32, 5 , 1 }, + { SYMCRYPT_LMS_SHAKE_M32_H10, 32, 10, 1 }, + { SYMCRYPT_LMS_SHAKE_M32_H15, 32, 15, 1 }, + { SYMCRYPT_LMS_SHAKE_M32_H20, 32, 20, 1 }, + { SYMCRYPT_LMS_SHAKE_M32_H25, 32, 25, 1 }, + { SYMCRYPT_LMS_SHA256_M24_H5, 24, 5 , 0 }, + { SYMCRYPT_LMS_SHA256_M24_H10, 24, 10, 0 }, + { SYMCRYPT_LMS_SHA256_M24_H15, 24, 15, 0 }, + { SYMCRYPT_LMS_SHA256_M24_H20, 24, 20, 0 }, + { SYMCRYPT_LMS_SHA256_M24_H25, 24, 25, 0 }, + { SYMCRYPT_LMS_SHAKE_M24_H5, 24, 5 , 1 }, + { SYMCRYPT_LMS_SHAKE_M24_H10, 24, 10, 1 }, + { SYMCRYPT_LMS_SHAKE_M24_H15, 24, 15, 1 }, + { SYMCRYPT_LMS_SHAKE_M24_H20, 24, 20, 1 }, + { SYMCRYPT_LMS_SHAKE_M24_H25, 24, 25, 1 }, +}; + +typedef struct _SYMCRYPT_LMS_OTS_PARAMETER_PREDEFINED +{ + SYMCRYPT_LMS_OTS_ALGID lmsOtsAlgId; + + // output length + UINT8 cbHashOutput; + + // Winternitz width + UINT8 nWidth; + + // hash function index + UINT8 nHashIdx; + +} SYMCRYPT_LMS_OTS_PARAMETER_PREDEFINED, * PSYMCRYPT_LMS_OTS_PARAMETER_PREDEFINED; +typedef const SYMCRYPT_LMS_OTS_PARAMETER_PREDEFINED* PCSYMCRYPT_LMS_OTS_PARAMETER_PREDEFINED; + +static const SYMCRYPT_LMS_OTS_PARAMETER_PREDEFINED LmsOtsParametersPredefined[] = { + + // algId n w HIdx + { SYMCRYPT_LMS_OTS_SHA256_N32_W1, 32, 1, 0 }, + { SYMCRYPT_LMS_OTS_SHA256_N32_W2, 32, 2, 0 }, + { SYMCRYPT_LMS_OTS_SHA256_N32_W4, 32, 4, 0 }, + { SYMCRYPT_LMS_OTS_SHA256_N32_W8, 32, 8, 0 }, + { SYMCRYPT_LMS_OTS_SHAKE_N32_W1, 32, 1, 1 }, + { SYMCRYPT_LMS_OTS_SHAKE_N32_W2, 32, 2, 1 }, + { SYMCRYPT_LMS_OTS_SHAKE_N32_W4, 32, 4, 1 }, + { SYMCRYPT_LMS_OTS_SHAKE_N32_W8, 32, 8, 1 }, + { SYMCRYPT_LMS_OTS_SHA256_N24_W1, 24, 1, 0 }, + { SYMCRYPT_LMS_OTS_SHA256_N24_W2, 24, 2, 0 }, + { SYMCRYPT_LMS_OTS_SHA256_N24_W4, 24, 4, 0 }, + { SYMCRYPT_LMS_OTS_SHA256_N24_W8, 24, 8, 0 }, + { SYMCRYPT_LMS_OTS_SHAKE_N24_W1, 24, 1, 1 }, + { SYMCRYPT_LMS_OTS_SHAKE_N24_W2, 24, 2, 1 }, + { SYMCRYPT_LMS_OTS_SHAKE_N24_W4, 24, 4, 1 }, + { SYMCRYPT_LMS_OTS_SHAKE_N24_W8, 24, 8, 1 }, +}; +static const BYTE SYMCRYPT_LMS_D_PBLC[] = { 0x80, 0x80 }; +static const BYTE SYMCRYPT_LMS_D_MESG[] = { 0x81, 0x81 }; +static const BYTE SYMCRYPT_LMS_D_LEAF[] = { 0x82, 0x82 }; +static const BYTE SYMCRYPT_LMS_D_INTR[] = { 0x83, 0x83 }; + +static +VOID +LmsHashMessage( + _In_ PCSYMCRYPT_HASH pHash, + _In_reads_bytes_(SYMCRYPT_LMS_KEY_PAIR_IDENTIFIER_SIZE) PCBYTE pbId, + _In_reads_bytes_(sizeof(UINT32)) PCBYTE pbLeafNumber, + _In_reads_bytes_(cbRandomizer) PCBYTE pbRandomizer, + SIZE_T cbRandomizer, + _In_reads_bytes_(cbMessage) PCBYTE pbMessage, + SIZE_T cbMessage, + _Out_writes_bytes_(cbOut) PBYTE pbOut, + SIZE_T cbOut) +{ + SYMCRYPT_HASH_STATE state = { 0 }; + + SymCryptHashInit(pHash, &state); + SymCryptHashAppend(pHash, &state, pbId, SYMCRYPT_LMS_KEY_PAIR_IDENTIFIER_SIZE); + SymCryptHashAppend(pHash, &state, pbLeafNumber, sizeof(UINT32)); + SymCryptHashAppend(pHash, &state, SYMCRYPT_LMS_D_MESG, sizeof(SYMCRYPT_LMS_D_MESG)); + SymCryptHashAppend(pHash, &state, pbRandomizer, cbRandomizer); + SymCryptHashAppend(pHash, &state, pbMessage, cbMessage); + SymCryptHashResult(pHash, &state, pbOut, cbOut); +} + +static +VOID +SYMCRYPT_CALL +LmsOtskeyComputePrivate( + _In_ PCSYMCRYPT_LMS_KEY pKey, + _In_ UINT32 nLeafNumber, + _In_ UINT32 nPIdx, + _Out_writes_bytes_(pKey->params.cbHashOutput) + PBYTE pbOtsPrivateKey) +{ + UINT32 cbHashOutput = pKey->params.cbHashOutput; + PCSYMCRYPT_HASH pHash = pKey->params.pLmsHashFunction; + SYMCRYPT_HASH_STATE state = { 0 }; + BYTE abTemp[sizeof(UINT32) + 3] = { 0 }; // sizeof(UINT32) for nLeafNumber, 2 bytes of nPIdx and 1 byte of 0xff + + SYMCRYPT_ASSERT(nLeafNumber <= (((UINT32)1 << pKey->params.nTreeHeight) - 1)); + + SYMCRYPT_STORE_MSBFIRST32(abTemp, nLeafNumber); + SYMCRYPT_STORE_MSBFIRST16(abTemp + sizeof(UINT32), (UINT16)nPIdx); + abTemp[sizeof(UINT32) + 2] = 0xff; + + SymCryptHashInit(pHash, &state); + SymCryptHashAppend(pHash, &state, pKey->abId, SYMCRYPT_LMS_KEY_PAIR_IDENTIFIER_SIZE); + SymCryptHashAppend(pHash, &state, abTemp, sizeof(abTemp)); + SymCryptHashAppend(pHash, &state, pKey->abSeed, cbHashOutput); + SymCryptHashResult(pHash, &state, pbOtsPrivateKey, cbHashOutput); +} + +static +VOID +SYMCRYPT_CALL +LmskeyWipe( + _Inout_ PSYMCRYPT_LMS_KEY pKey) +{ + SYMCRYPT_CHECK_MAGIC(pKey); + + SymCryptWipeKnownSize(pKey->abSeed, sizeof(pKey->abSeed)); + SymCryptWipeKnownSize(pKey->abPublicRoot, sizeof(pKey->abPublicRoot)); + SymCryptWipeKnownSize(pKey->abId, sizeof(pKey->abId)); + pKey->nNextUnusedLeaf = 0; + pKey->keyType = SYMCRYPT_LMSKEY_TYPE_NONE; +} + +static +UINT16 +LmsOtsCalculateChecksum( + _In_reads_bytes_(cbString) PCBYTE pbString, + UINT32 cbString, + UINT32 nWidth, + UINT32 nLeftShift) +{ + UINT32 sum = 0; + UINT32 max = (1 << nWidth) - 1; + SYMCRYPT_ASSERT(SYMCRYPT_IS_VALID_WINTERNITZ_WIDTH(nWidth)); + + for (UINT32 i = 0; i < (cbString * 8 / nWidth); i = i + 1) + { + sum = sum + max - SymCryptHbsGetDigit(nWidth, pbString, cbString, i); + } + return (UINT16)(sum << nLeftShift); +} + +static +SIZE_T +SYMCRYPT_CALL +LmsOtsSizeofSignatureFromParams( + _In_ PCSYMCRYPT_LMS_PARAMS pParams) +{ + UINT32 n = pParams->cbHashOutput; + UINT32 p = pParams->nByteStringCount; + SIZE_T size = 0; + + size += sizeof(UINT32); // type + size += n; // randomizer + size += p * n; // y[0..p-1] + + return size; +} + +static +VOID +SYMCRYPT_CALL +LmsOtskeySign( + _In_ PSYMCRYPT_LMS_KEY pKey, + UINT64 nLeafNumber, + _In_reads_bytes_(cbMessage) PCBYTE pbMessage, + SIZE_T cbMessage, + _In_reads_bytes_(pKey->params.cbHashOutput) PCBYTE pbRandomizer, + _Out_writes_bytes_(cbSignature) PBYTE pbSignature, + SIZE_T cbSignature) +{ + PCSYMCRYPT_HASH pHash = pKey->params.pLmsHashFunction; + SYMCRYPT_HASH_STATE state = { 0 }; + UINT32 nIndex = 0; + UINT32 cbHashOutput = pKey->params.cbHashOutput; + UINT32 nWinternitzChainWidth = pKey->params.nWinternitzChainWidth; + SIZE_T cbRemainingBytes = cbSignature; + UINT16 nChecksum = 0; + BYTE en32LeafNumber[sizeof(UINT32)] = {0}; + BYTE en16Index[sizeof(UINT16)] = { 0 }; + BYTE abOtsPrivateKey[SYMCRYPT_LMS_MAX_N] = { 0 }; + BYTE abLmsHashedMessage[SYMCRYPT_LMS_MAX_N + sizeof(nChecksum)] = { 0 }; + PBYTE pbDest = pbSignature; + + SYMCRYPT_ASSERT(cbSignature == LmsOtsSizeofSignatureFromParams(&pKey->params)); + + SYMCRYPT_STORE_MSBFIRST32(pbDest, pKey->params.lmsOtsAlgID); + pbDest += sizeof(UINT32); + cbRemainingBytes -= sizeof(UINT32); + + memcpy(pbDest, pbRandomizer, cbHashOutput); + pbDest += cbHashOutput; + cbRemainingBytes -= cbHashOutput; + + SYMCRYPT_STORE_MSBFIRST32(en32LeafNumber, (UINT32)nLeafNumber); + LmsHashMessage(pHash, pKey->abId, en32LeafNumber, pbRandomizer, cbHashOutput, pbMessage, cbMessage, abLmsHashedMessage, cbHashOutput); + + nChecksum = LmsOtsCalculateChecksum(abLmsHashedMessage, cbHashOutput, nWinternitzChainWidth, pKey->params.nChecksumLShiftBits); + SYMCRYPT_STORE_MSBFIRST16((UINT16*)&abLmsHashedMessage[cbHashOutput], nChecksum); + + SymCryptHashInit(pHash, &state); + for (nIndex = 0; nIndex < pKey->params.nByteStringCount; nIndex++) + { + BYTE coeff = (BYTE)SymCryptHbsGetDigit(nWinternitzChainWidth, abLmsHashedMessage, cbHashOutput + sizeof(nChecksum), nIndex); + LmsOtskeyComputePrivate(pKey, (UINT32)nLeafNumber, nIndex, abOtsPrivateKey); + + SYMCRYPT_STORE_MSBFIRST16(en16Index, (UINT16)nIndex); + + for (BYTE j = 0; j < coeff; j++) + { + SymCryptHashAppend(pHash, &state, pKey->abId, SYMCRYPT_LMS_KEY_PAIR_IDENTIFIER_SIZE); + SymCryptHashAppend(pHash, &state, en32LeafNumber, sizeof(UINT32)); + SymCryptHashAppend(pHash, &state, en16Index, sizeof(UINT16)); + SymCryptHashAppend(pHash, &state, &j, 1); + SymCryptHashAppend(pHash, &state, abOtsPrivateKey, cbHashOutput); + SymCryptHashResult(pHash, &state, abOtsPrivateKey, cbHashOutput); + } + memcpy(pbDest, abOtsPrivateKey, cbHashOutput); + pbDest += cbHashOutput; + cbRemainingBytes -= cbHashOutput; + } + SYMCRYPT_ASSERT(cbRemainingBytes == 0); + + return; +} + +static +VOID +SYMCRYPT_CALL +LmsOtskeyComputePublic( + _In_ PCSYMCRYPT_LMS_KEY pKey, + UINT32 nNodeIdx, + _Out_writes_bytes_(pKey->params.cbHashOutput) PBYTE pbK) +{ + UINT32 cbHashOutput = pKey->params.cbHashOutput; + UINT32 maxJ = (1 << pKey->params.nWinternitzChainWidth) - 1; + PCSYMCRYPT_HASH pHash = pKey->params.pLmsHashFunction; + SYMCRYPT_HASH_STATE statePriv = { 0 }; + SYMCRYPT_HASH_STATE statePub = { 0 }; + BYTE en32LeafNumber[sizeof(UINT32)] = { 0 }; + BYTE en16Index[sizeof(UINT16)] = { 0 }; + BYTE abNode[SYMCRYPT_LMS_MAX_N] = { 0 }; + + SYMCRYPT_STORE_MSBFIRST32(en32LeafNumber, nNodeIdx); + + SymCryptHashInit(pHash, &statePub); + SymCryptHashAppend(pHash, &statePub, pKey->abId, SYMCRYPT_LMS_KEY_PAIR_IDENTIFIER_SIZE); + SymCryptHashAppend(pHash, &statePub, en32LeafNumber, sizeof(UINT32)); + SymCryptHashAppend(pHash, &statePub, SYMCRYPT_LMS_D_PBLC, sizeof(SYMCRYPT_LMS_D_PBLC)); + + SymCryptHashInit(pHash, &statePriv); + for (UINT32 i = 0; i < pKey->params.nByteStringCount; i++) + { + LmsOtskeyComputePrivate(pKey, nNodeIdx, i, abNode); + SYMCRYPT_STORE_MSBFIRST16(en16Index, (UINT16)i); + + for (BYTE j = 0; j < maxJ; j++) + { + SymCryptHashAppend(pHash, &statePriv, pKey->abId, SYMCRYPT_LMS_KEY_PAIR_IDENTIFIER_SIZE); + SymCryptHashAppend(pHash, &statePriv, en32LeafNumber, sizeof(UINT32)); + SymCryptHashAppend(pHash, &statePriv, en16Index, sizeof(UINT16)); + SymCryptHashAppend(pHash, &statePriv, &j, 1); + SymCryptHashAppend(pHash, &statePriv, abNode, cbHashOutput); + SymCryptHashResult(pHash, &statePriv, abNode, cbHashOutput); + } + SymCryptHashAppend(pHash, &statePub, abNode, cbHashOutput); + } + SymCryptHashResult(pHash, &statePub, pbK, cbHashOutput); +} + +static +VOID +SYMCRYPT_CALL +LmsComputeNodeValue( + _In_ PCSYMCRYPT_LMS_KEY pKey, + UINT32 nIndex, + _Out_writes_bytes_(pKey->params.cbHashOutput) PBYTE pbNodeValue, + SIZE_T cbNodeValue) +{ + UNREFERENCED_PARAMETER(cbNodeValue); + + UINT32 cbHashOutput = pKey->params.cbHashOutput; + UINT32 nInternalNodes = (UINT32)1 << pKey->params.nTreeHeight; + PCSYMCRYPT_HASH pHash = pKey->params.pLmsHashFunction; + SYMCRYPT_HASH_STATE state = { 0 }; + BYTE abTemp[SYMCRYPT_LMS_MAX_N] = { 0 }; + BYTE en32Index[sizeof(UINT32)] = { 0 }; + BYTE abOtsPubKey[SYMCRYPT_LMS_MAX_N] = { 0 }; + + SYMCRYPT_ASSERT(nIndex > 0); + SYMCRYPT_ASSERT(cbNodeValue == cbHashOutput); + + SYMCRYPT_STORE_MSBFIRST32(en32Index, nIndex); + + SymCryptHashInit(pHash, &state); + SymCryptHashAppend(pHash, &state, pKey->abId, SYMCRYPT_LMS_KEY_PAIR_IDENTIFIER_SIZE); + SymCryptHashAppend(pHash, &state, en32Index, sizeof(UINT32)); + if (nIndex >= nInternalNodes) + { + LmsOtskeyComputePublic(pKey, nIndex - nInternalNodes, abOtsPubKey); + + SymCryptHashAppend(pHash, &state, SYMCRYPT_LMS_D_LEAF, sizeof(SYMCRYPT_LMS_D_LEAF)); + SymCryptHashAppend(pHash, &state, abOtsPubKey, cbHashOutput); + } + else + { + SymCryptHashAppend(pHash, &state, SYMCRYPT_LMS_D_INTR, sizeof(SYMCRYPT_LMS_D_INTR)); + + LmsComputeNodeValue(pKey, 2 * nIndex, abTemp, cbHashOutput); + SymCryptHashAppend(pHash, &state, abTemp, cbHashOutput); + SymCryptWipeKnownSize(abTemp, SYMCRYPT_LMS_MAX_N); + + LmsComputeNodeValue(pKey, 2 * nIndex + 1, abTemp, cbHashOutput); + SymCryptHashAppend(pHash, &state, abTemp, cbHashOutput); + } + SymCryptHashResult(pHash, &state, pbNodeValue, cbHashOutput); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLmsParamsFromAlgId( + SYMCRYPT_LMS_ALGID lmsAlgID, + SYMCRYPT_LMS_OTS_ALGID lmsOtsAlgID, + _Out_ PSYMCRYPT_LMS_PARAMS pParams) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + SIZE_T uLmsParametersCount = 0; + SIZE_T uLmsOtsParametersCount = 0; + UINT32 u = 0; + UINT32 v = 0; + PCSYMCRYPT_LMS_PARAMETER_PREDEFINED pLmsParameters = NULL; + PCSYMCRYPT_LMS_OTS_PARAMETER_PREDEFINED pLmsOtsParameters = NULL; + BOOL bFound = FALSE; + + SymCryptWipeKnownSize(pParams, sizeof(*pParams)); + pLmsOtsParameters = LmsOtsParametersPredefined; + uLmsOtsParametersCount = SYMCRYPT_ARRAY_SIZE(LmsOtsParametersPredefined); + pLmsParameters = LmsParametersPredefined; + uLmsParametersCount = SYMCRYPT_ARRAY_SIZE(LmsParametersPredefined); + + for (UINT32 i = 0; i < uLmsParametersCount; i++) + { + if (pLmsParameters[i].lmsAlgId == lmsAlgID) + { + pParams->lmsAlgID = lmsAlgID; + pParams->nTreeHeight = pLmsParameters[i].nTreeHeight; + SYMCRYPT_ASSERT(pParams->nTreeHeight <= SYMCRYPT_LMS_MAX_CUSTOM_TREE_HEIGHT); + + pParams->cbHashOutput = pLmsParameters[i].cbHashOutput; + + SYMCRYPT_ASSERT(pLmsParameters[i].nHashIdx < SYMCRYPT_ARRAY_SIZE(LmsHashObjects)); + pParams->pLmsHashFunction = *LmsHashObjects[pLmsParameters[i].nHashIdx]; + bFound = TRUE; + break; + } + } + if (!bFound) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + bFound = FALSE; + for (UINT32 i = 0; i < uLmsOtsParametersCount; i++) + { + if (pLmsOtsParameters[i].lmsOtsAlgId == lmsOtsAlgID) + { + SYMCRYPT_ASSERT(pLmsOtsParameters[i].nHashIdx < SYMCRYPT_ARRAY_SIZE(LmsHashObjects)); + + if (pParams->pLmsHashFunction != *LmsHashObjects[pLmsOtsParameters[i].nHashIdx] || + pParams->cbHashOutput != pLmsOtsParameters[i].cbHashOutput) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + pParams->lmsOtsAlgID = lmsOtsAlgID; + pParams->nWinternitzChainWidth = pLmsOtsParameters[i].nWidth; + SymCryptHbsGetWinternitzLengths( + pParams->cbHashOutput, + pParams->nWinternitzChainWidth, + &u, + &v); + SYMCRYPT_ASSERT((v * pParams->nWinternitzChainWidth) <= SYMCRYPT_LMS_CHECKSUM_SIZE); + pParams->nChecksumLShiftBits = SYMCRYPT_LMS_CHECKSUM_SIZE - (v * pParams->nWinternitzChainWidth); + pParams->nByteStringCount = u + v; + bFound = TRUE; + break; + } + } + if (!bFound) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + +cleanup: + return scError; +} + +SIZE_T +SYMCRYPT_CALL +SymCryptLmsSizeofSignatureFromParams( + _In_ PCSYMCRYPT_LMS_PARAMS pParams) +{ + SIZE_T size = 0; + + size += sizeof(UINT32); // q + size += LmsOtsSizeofSignatureFromParams(pParams); // LMS-OTS signature + size += sizeof(UINT32); // type + size += pParams->nTreeHeight * pParams->cbHashOutput; // path[0..h-1] + return size; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLmsSetParams( + _Out_ PSYMCRYPT_LMS_PARAMS pParams, + UINT32 lmsAlgID, + UINT32 lmsOtsAlgID, + _In_ PCSYMCRYPT_HASH pLmsHashFunction, + UINT32 cbHashOutput, + UINT32 nTreeHeight, + UINT32 nWinternitzChainWidth) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + UINT32 u = 0; + UINT32 v = 0; + + SymCryptWipeKnownSize(pParams, sizeof(*pParams)); + + // nTreeHeight must be positive and maximum SYMCRYPT_LMS_MAX_CUSTOM_TREE_HEIGHT + if (nTreeHeight == 0 || nTreeHeight > SYMCRYPT_LMS_MAX_CUSTOM_TREE_HEIGHT) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Output cbHashOutput cannot be larger than the hash output size or SYMCRYPT_LMS_MAX_N + if (cbHashOutput == 0 || cbHashOutput > pLmsHashFunction->resultSize || cbHashOutput > SYMCRYPT_LMS_MAX_N) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Winternitz parameter must be one of 1, 2, 4, or 8 + if (!SYMCRYPT_IS_VALID_WINTERNITZ_WIDTH(nWinternitzChainWidth)) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + pParams->lmsAlgID = lmsAlgID; + pParams->lmsOtsAlgID = lmsOtsAlgID; + pParams->pLmsHashFunction = pLmsHashFunction; + pParams->nTreeHeight = nTreeHeight; + pParams->cbHashOutput = cbHashOutput; + pParams->nWinternitzChainWidth = nWinternitzChainWidth; + SymCryptHbsGetWinternitzLengths( + pParams->cbHashOutput, + pParams->nWinternitzChainWidth, + &u, + &v); + pParams->nChecksumLShiftBits = SYMCRYPT_LMS_CHECKSUM_SIZE - (v * pParams->nWinternitzChainWidth); + pParams->nByteStringCount = u + v; + +cleanup: + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLmsSizeofKeyBlobFromParams( + _In_ PCSYMCRYPT_LMS_PARAMS pParams, + SYMCRYPT_LMSKEY_TYPE keyType, + _Out_ SIZE_T* pcbKey) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + switch (keyType) + { + case SYMCRYPT_LMSKEY_TYPE_PUBLIC: + *pcbKey = SYMCRYPT_LMS_PUB_KEY_SIZE(pParams->cbHashOutput); + break; + + case SYMCRYPT_LMSKEY_TYPE_PRIVATE: + *pcbKey = SYMCRYPT_LMS_PRIV_KEY_SIZE(pParams->cbHashOutput); + break; + + default: + scError = SYMCRYPT_INVALID_ARGUMENT; + break; + } + + return scError; +} + +PSYMCRYPT_LMS_KEY +SYMCRYPT_CALL +SymCryptLmskeyAllocate( + _In_ PCSYMCRYPT_LMS_PARAMS pParams, + UINT32 flags) +{ + PSYMCRYPT_LMS_KEY pKey = NULL; + SIZE_T cbSize = sizeof(SYMCRYPT_LMS_KEY); + + if (flags != 0) + { + goto cleanup; + } + + pKey = SymCryptCallbackAlloc(cbSize); + if (pKey == NULL) + { + goto cleanup; + } + + SymCryptWipe(pKey, cbSize); + pKey->cbSize = cbSize; + + memcpy(&pKey->params, pParams, sizeof(*pParams)); + SYMCRYPT_SET_MAGIC(pKey); + +cleanup: + return pKey; +} + +VOID +SYMCRYPT_CALL +SymCryptLmskeyFree( + _Inout_ PSYMCRYPT_LMS_KEY pKey) +{ + SYMCRYPT_CHECK_MAGIC(pKey); + + SymCryptWipeKnownSize(pKey, sizeof(*pKey)); + SymCryptCallbackFree(pKey); +} + +static +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLmskeyVerifyRoot( + _In_ PCSYMCRYPT_LMS_KEY pKey) +{ + BYTE abPublicRoot[SYMCRYPT_LMS_MAX_N] = { 0 }; + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + SYMCRYPT_CHECK_MAGIC(pKey); + + // key to be verified has to be a private key + if (pKey->keyType != SYMCRYPT_LMSKEY_TYPE_PRIVATE) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // compute the public root from the private key, root node has index 1 + LmsComputeNodeValue( + pKey, + 1, + abPublicRoot, + pKey->params.cbHashOutput); + + if (!SymCryptEqual(abPublicRoot, pKey->abPublicRoot, pKey->params.cbHashOutput)) + { + scError = SYMCRYPT_HBS_PUBLIC_ROOT_MISMATCH; + } + +cleanup: + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLmskeyGenerate( + _Inout_ PSYMCRYPT_LMS_KEY pKey, + UINT32 flags) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + SYMCRYPT_CHECK_MAGIC(pKey); + + if (flags != 0) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + pKey->nNextUnusedLeaf = 0; + // Set the LMS key identifier I + scError = SymCryptCallbackRandom(pKey->abId, SYMCRYPT_LMS_KEY_PAIR_IDENTIFIER_SIZE); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // Set the private key Seed value + scError = SymCryptCallbackRandom(pKey->abSeed, pKey->params.cbHashOutput); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // compute the public root from the private key + LmsComputeNodeValue( + pKey, + 1, + pKey->abPublicRoot, + pKey->params.cbHashOutput); + + pKey->keyType = SYMCRYPT_LMSKEY_TYPE_PRIVATE; + +cleanup: + if (scError != SYMCRYPT_NO_ERROR) + { + LmskeyWipe(pKey); + } + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLmskeyGetValue( + _In_ PCSYMCRYPT_LMS_KEY pKey, + SYMCRYPT_LMSKEY_TYPE keyType, + UINT32 flags, + _Out_writes_bytes_(cbBlob) PBYTE pbBlob, + SIZE_T cbBlob) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + SIZE_T cbHashOutput = pKey->params.cbHashOutput; + SIZE_T cbKey = 0; + + SYMCRYPT_CHECK_MAGIC(pKey); + + if (flags != 0 || + (keyType != SYMCRYPT_LMSKEY_TYPE_PRIVATE && + keyType != SYMCRYPT_LMSKEY_TYPE_PUBLIC)) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if ((keyType == SYMCRYPT_LMSKEY_TYPE_PRIVATE) && (pKey->keyType == SYMCRYPT_LMSKEY_TYPE_PUBLIC)) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + SymCryptLmsSizeofKeyBlobFromParams(&pKey->params, keyType, &cbKey); + if (cbBlob != cbKey) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + SYMCRYPT_STORE_MSBFIRST32(pbBlob, (UINT32)pKey->params.lmsAlgID); + pbBlob += sizeof(UINT32); + + SYMCRYPT_STORE_MSBFIRST32(pbBlob, (UINT32)pKey->params.lmsOtsAlgID); + pbBlob += sizeof(UINT32); + + memcpy(pbBlob, pKey->abId, SYMCRYPT_LMS_KEY_PAIR_IDENTIFIER_SIZE); + pbBlob += SYMCRYPT_LMS_KEY_PAIR_IDENTIFIER_SIZE; + + memcpy(pbBlob, pKey->abPublicRoot, cbHashOutput); + pbBlob += cbHashOutput; + + if (keyType == SYMCRYPT_LMSKEY_TYPE_PRIVATE) + { + + SYMCRYPT_ASSERT((pKey->nNextUnusedLeaf & 0xFFFFFFFF00000000) == 0); + + SYMCRYPT_STORE_MSBFIRST32(pbBlob, (UINT32)pKey->nNextUnusedLeaf); + pbBlob += sizeof(UINT32); + + memcpy(pbBlob, pKey->abSeed, cbHashOutput); + } + +cleanup: + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLmskeySetValue( + _In_reads_bytes_(cbBlob) PCBYTE pbBlob, + SIZE_T cbBlob, + SYMCRYPT_LMSKEY_TYPE keyType, + UINT32 flags, + _Inout_ PSYMCRYPT_LMS_KEY pKey) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + UINT32 lmsAlgID = 0; + UINT32 lmsOtsAlgID = 0; + SIZE_T cbKey = 0; + + SYMCRYPT_ASSERT(keyType == SYMCRYPT_LMSKEY_TYPE_PUBLIC || keyType == SYMCRYPT_LMSKEY_TYPE_PRIVATE); + + SYMCRYPT_CHECK_MAGIC(pKey); + + if (flags & (~SYMCRYPT_FLAG_LMSKEY_VERIFY_ROOT)) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Public key validation can only be performed for private keys + if ((flags & SYMCRYPT_FLAG_LMSKEY_VERIFY_ROOT) != 0 && + keyType != SYMCRYPT_LMSKEY_TYPE_PRIVATE) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + SymCryptLmsSizeofKeyBlobFromParams(&pKey->params, keyType, &cbKey); + if (cbBlob != cbKey) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + lmsAlgID = SYMCRYPT_LOAD_MSBFIRST32(pbBlob); + pbBlob += sizeof(UINT32); + + lmsOtsAlgID = SYMCRYPT_LOAD_MSBFIRST32(pbBlob); + pbBlob += sizeof(UINT32); + + // check if the lmsAlgID and lmsOtsAlgID matches the ones in the key + if (lmsAlgID != pKey->params.lmsAlgID || lmsOtsAlgID != pKey->params.lmsOtsAlgID) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + SymCryptWipeKnownSize(pKey->abPublicRoot, sizeof(pKey->abPublicRoot)); + SymCryptWipeKnownSize(pKey->abId, sizeof(pKey->abId)); + + pKey->keyType = keyType; + + memcpy(pKey->abId, pbBlob, SYMCRYPT_LMS_KEY_PAIR_IDENTIFIER_SIZE); + pbBlob += SYMCRYPT_LMS_KEY_PAIR_IDENTIFIER_SIZE; + + memcpy(pKey->abPublicRoot, pbBlob, pKey->params.cbHashOutput); + pbBlob += pKey->params.cbHashOutput; + + if (keyType == SYMCRYPT_LMSKEY_TYPE_PRIVATE) + { + // Wipe private key material + pKey->nNextUnusedLeaf = 0; + SymCryptWipeKnownSize(pKey->abSeed, sizeof(pKey->abSeed)); + + pKey->nNextUnusedLeaf = SYMCRYPT_LOAD_MSBFIRST32(pbBlob); + pbBlob += sizeof(UINT32); + + memcpy(pKey->abSeed, pbBlob,pKey->params.cbHashOutput); + + if (flags & SYMCRYPT_FLAG_LMSKEY_VERIFY_ROOT) + { + scError = SymCryptLmskeyVerifyRoot(pKey); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + } + } + +cleanup: + if (scError != SYMCRYPT_NO_ERROR) + { + LmskeyWipe(pKey); + } + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLmsSign( + _Inout_ PSYMCRYPT_LMS_KEY pKey, + _In_reads_bytes_(cbMessage) PCBYTE pbMessage, + SIZE_T cbMessage, + UINT32 flags, + _Out_writes_bytes_(cbSignature) PBYTE pbSignature, + SIZE_T cbSignature) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + UINT32 nLeafNumber = (UINT32)pKey->nNextUnusedLeaf; + UINT32 cbHashOutput = pKey->params.cbHashOutput; + UINT32 nTreeHeight = pKey->params.nTreeHeight; + SIZE_T cbRemainingBytes = cbSignature; + UINT32 nLeavesCount = ((UINT32)1 << nTreeHeight); + UINT32 nNodeIndex = 0; + UINT32 nTemp = 0; + SIZE_T cbOtsSignature = LmsOtsSizeofSignatureFromParams(&pKey->params); + BYTE abLMSRandomizer[SYMCRYPT_LMS_MAX_N] = { 0 }; + + SYMCRYPT_CHECK_MAGIC(pKey); + + if (flags != 0) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if (pKey->keyType != SYMCRYPT_LMSKEY_TYPE_PRIVATE) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if (cbSignature != SymCryptLmsSizeofSignatureFromParams(&pKey->params)) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + scError = SymCryptCallbackRandom(abLMSRandomizer, cbHashOutput); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + nLeafNumber = (UINT32)SYMCRYPT_ATOMIC_ADD64_POST_RELAXED(&pKey->nNextUnusedLeaf, 1) - 1; + if (nLeafNumber >= (nLeavesCount)) + { + scError = SYMCRYPT_HBS_NO_OTS_KEYS_LEFT; + pKey->nNextUnusedLeaf = nLeavesCount; + goto cleanup; + } + SYMCRYPT_STORE_MSBFIRST32(pbSignature, nLeafNumber); + pbSignature += sizeof(UINT32); + cbRemainingBytes -= sizeof(UINT32); + + LmsOtskeySign( + pKey, + nLeafNumber, + pbMessage, + cbMessage, + abLMSRandomizer, + pbSignature, + cbOtsSignature); + pbSignature += cbOtsSignature; + cbRemainingBytes -= cbOtsSignature; + + SYMCRYPT_STORE_MSBFIRST32(pbSignature, pKey->params.lmsAlgID); + pbSignature += sizeof(UINT32); + cbRemainingBytes -= sizeof(UINT32); + + nNodeIndex = nLeavesCount + nLeafNumber; + // write the path into the signature + for (UINT32 nIndex = 0; nIndex < nTreeHeight; nIndex++) + { + nTemp = (nNodeIndex >> nIndex) ^ 1; + LmsComputeNodeValue( + pKey, + nTemp, + pbSignature, + cbHashOutput); + pbSignature += cbHashOutput; + cbRemainingBytes -= cbHashOutput; + } + SYMCRYPT_ASSERT(cbRemainingBytes == 0); + +cleanup: + return scError; +} + +static +SYMCRYPT_ERROR +SYMCRYPT_CALL +LmsComputeOtsPubKeyCandidate( + UINT32 nLeafNumber, + _In_reads_bytes_(cbMessage) PCBYTE pbMessage, + SIZE_T cbMessage, + _In_reads_bytes_(cbOtsSignature) PCBYTE pbOtsSignature, + SIZE_T cbOtsSignature, + _In_reads_bytes_(SYMCRYPT_LMS_KEY_PAIR_IDENTIFIER_SIZE) PCBYTE pbId, + _In_ PCSYMCRYPT_LMS_PARAMS pSigParams, + _Out_writes_bytes_(pSigParams->cbHashOutput) PBYTE pbOtsPubKeyCandidate) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + UINT32 cbHashOutput = pSigParams->cbHashOutput; + UINT32 nWinternitzChainWidth = pSigParams->nWinternitzChainWidth; + UINT32 nByteStringCount = pSigParams->nByteStringCount; + UINT32 nSigType = 0; + UINT32 nMaxJ = (1 << nWinternitzChainWidth) - 1; + UINT16 nCksm = 0; + PCSYMCRYPT_HASH pHash = pSigParams->pLmsHashFunction; + SYMCRYPT_HASH_STATE state = { 0 }; + SYMCRYPT_HASH_STATE stateKc = { 0 }; + BYTE en32LeafNumber[sizeof(UINT32)] = { 0 }; + BYTE en16Index[sizeof(UINT16)] = { 0 }; + BYTE abLmsHashedMsg[SYMCRYPT_LMS_MAX_N + sizeof(nCksm)] = { 0 }; + BYTE abTmpRes[SYMCRYPT_LMS_MAX_N] = { 0 }; + PCBYTE pbRandomizer = NULL; + + if (cbOtsSignature != LmsOtsSizeofSignatureFromParams(pSigParams)) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + nSigType = SYMCRYPT_LOAD_MSBFIRST32(pbOtsSignature); + pbOtsSignature += sizeof(UINT32); + if (nSigType != pSigParams->lmsOtsAlgID) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + pbRandomizer = pbOtsSignature; + pbOtsSignature += cbHashOutput; + + SYMCRYPT_STORE_MSBFIRST32(en32LeafNumber, nLeafNumber); + + LmsHashMessage(pHash, pbId, en32LeafNumber, pbRandomizer, cbHashOutput, pbMessage, cbMessage, abLmsHashedMsg, cbHashOutput); + nCksm = LmsOtsCalculateChecksum(abLmsHashedMsg, cbHashOutput, nWinternitzChainWidth, pSigParams->nChecksumLShiftBits); + SYMCRYPT_STORE_MSBFIRST16((UINT16*)&abLmsHashedMsg[cbHashOutput], (UINT16)nCksm); + + SymCryptHashInit(pHash, &stateKc); + SymCryptHashAppend(pHash, &stateKc, pbId, SYMCRYPT_LMS_KEY_PAIR_IDENTIFIER_SIZE); + SymCryptHashAppend(pHash, &stateKc, en32LeafNumber, sizeof(UINT32)); + SymCryptHashAppend(pHash, &stateKc, SYMCRYPT_LMS_D_PBLC, sizeof(SYMCRYPT_LMS_D_PBLC)); + + SymCryptHashInit(pHash, &state); + for (UINT32 i = 0; i < nByteStringCount; i++) + { + BYTE a = (BYTE)SymCryptHbsGetDigit(nWinternitzChainWidth, abLmsHashedMsg, cbHashOutput + sizeof(nCksm), i); + PCBYTE tmp = pbOtsSignature + (i * cbHashOutput); + + SYMCRYPT_STORE_MSBFIRST16(en16Index, (UINT16)i); + + for (BYTE j = a; j < nMaxJ; j++) + { + SymCryptHashAppend(pHash, &state, pbId, SYMCRYPT_LMS_KEY_PAIR_IDENTIFIER_SIZE); + SymCryptHashAppend(pHash, &state, en32LeafNumber, sizeof(UINT32)); + SymCryptHashAppend(pHash, &state, en16Index, sizeof(UINT16)); + SymCryptHashAppend(pHash, &state, &j, 1); + SymCryptHashAppend(pHash, &state, tmp, cbHashOutput); + SymCryptHashResult(pHash, &state, abTmpRes, cbHashOutput); + tmp = abTmpRes; + } + SymCryptHashAppend(pHash, &stateKc, tmp, cbHashOutput); + } + SymCryptHashResult(pHash, &stateKc, pbOtsPubKeyCandidate, cbHashOutput); + +cleanup: + return scError; +} + +static +VOID +SYMCRYPT_CALL +LmsComputeRootCandidate( + UINT32 nLeafNumber, + _In_ PCSYMCRYPT_LMS_PARAMS pParams, + _In_reads_bytes_(SYMCRYPT_LMS_KEY_PAIR_IDENTIFIER_SIZE) PCBYTE pbId, + _In_reads_bytes_(pParams->nTreeHeight * pParams->cbHashOutput) PCBYTE pbPath, + _In_reads_bytes_(pParams->cbHashOutput) PCBYTE pbPubKeyCandidate, + _Out_writes_bytes_(pParams->cbHashOutput) PBYTE pbRootCandidate +) +{ + PCSYMCRYPT_HASH pHash = pParams->pLmsHashFunction; + SYMCRYPT_HASH_STATE state = { 0 }; + UINT32 cbHashOutput = pParams->cbHashOutput; + UINT32 nIndex = 0; + UINT32 nNodeNum = (1 << pParams->nTreeHeight) + nLeafNumber; + PBYTE pbTemp = pbRootCandidate; + BYTE en32NodeNum[sizeof(UINT32)] = { 0 }; + + SYMCRYPT_STORE_MSBFIRST32(en32NodeNum, nNodeNum); + SymCryptHashInit(pHash, &state); + SymCryptHashAppend(pHash, &state, pbId, SYMCRYPT_LMS_KEY_PAIR_IDENTIFIER_SIZE); + SymCryptHashAppend(pHash, &state, en32NodeNum, sizeof(UINT32)); + SymCryptHashAppend(pHash, &state, SYMCRYPT_LMS_D_LEAF, sizeof(SYMCRYPT_LMS_D_LEAF)); + SymCryptHashAppend(pHash, &state, pbPubKeyCandidate, cbHashOutput); + SymCryptHashResult(pHash, &state, pbTemp, cbHashOutput); + + for (nIndex = 0; nIndex < pParams->nTreeHeight; nIndex ++) + { + SYMCRYPT_STORE_MSBFIRST32(en32NodeNum, nNodeNum / 2); + SymCryptHashAppend(pHash, &state, pbId, SYMCRYPT_LMS_KEY_PAIR_IDENTIFIER_SIZE); + SymCryptHashAppend(pHash, &state, en32NodeNum, sizeof(UINT32)); + SymCryptHashAppend(pHash, &state, SYMCRYPT_LMS_D_INTR, sizeof(SYMCRYPT_LMS_D_INTR)); + if (nNodeNum % 2) + { + SymCryptHashAppend(pHash, &state, pbPath + (cbHashOutput * nIndex), cbHashOutput); + SymCryptHashAppend(pHash, &state, pbTemp, cbHashOutput); + } + else + { + SymCryptHashAppend(pHash, &state, pbTemp, cbHashOutput); + SymCryptHashAppend(pHash, &state, pbPath + (cbHashOutput * nIndex), cbHashOutput); + } + SymCryptHashResult(pHash, &state, pbTemp, cbHashOutput); + nNodeNum /= 2; + } + SYMCRYPT_ASSERT(nNodeNum <= 1); + + memcpy(pbRootCandidate, pbTemp, cbHashOutput); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLmsVerifyInternal( + _In_ PCSYMCRYPT_LMS_KEY pKey, + _In_reads_bytes_(cbMessage) PCBYTE pbMessage, + SIZE_T cbMessage, + UINT32 flags, + _In_reads_bytes_(cbSignature) PCBYTE pbSignature, + SIZE_T cbSignature) +{ + SYMCRYPT_ASSERT(pKey != NULL); + SYMCRYPT_ASSERT(pKey->keyType != SYMCRYPT_LMSKEY_TYPE_NONE); + SYMCRYPT_CHECK_MAGIC(pKey); + + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + UINT32 cbHashOutput = pKey->params.cbHashOutput; + PCSYMCRYPT_LMS_PARAMS pLmsKeyParams = &pKey->params; + PCBYTE pbLocSignature = pbSignature; + BYTE abRootCandidate[SYMCRYPT_LMS_MAX_N] = { 0 }; + BYTE abOtsPubKeyCandidate[SYMCRYPT_LMS_MAX_N] = { 0 }; + + if (flags != 0) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if (cbSignature != SymCryptLmsSizeofSignatureFromParams(&pKey->params)) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + UINT32 nLeafNumber = SYMCRYPT_LOAD_MSBFIRST32(pbLocSignature); + pbLocSignature += sizeof(UINT32); + if (nLeafNumber >= ((UINT32)1 << pKey->params.nTreeHeight)) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + UINT32 nOtsSigtype = SYMCRYPT_LOAD_MSBFIRST32(pbLocSignature); + pbLocSignature += sizeof(UINT32); + + if (nOtsSigtype != pLmsKeyParams->lmsOtsAlgID) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + pbLocSignature += cbHashOutput * (pKey->params.nByteStringCount + 1); // +1 is for the randomizer + UINT32 nSigType = SYMCRYPT_LOAD_MSBFIRST32(pbLocSignature); + pbLocSignature += sizeof(UINT32); + + if (nSigType != pLmsKeyParams->lmsAlgID) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + scError = LmsComputeOtsPubKeyCandidate( + nLeafNumber, + pbMessage, + cbMessage, + pbSignature + sizeof(UINT32), //the +sizeof(UINT32) is to skip the leaf number and reach the LMS-OTS signature + LmsOtsSizeofSignatureFromParams(&pKey->params), + pKey->abId, + pLmsKeyParams, + abOtsPubKeyCandidate); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + LmsComputeRootCandidate( + nLeafNumber, + pLmsKeyParams, + pKey->abId, + pbLocSignature, + abOtsPubKeyCandidate, + abRootCandidate); + if (!SymCryptEqual(abRootCandidate, pKey->abPublicRoot, cbHashOutput)) + { + scError = SYMCRYPT_SIGNATURE_VERIFICATION_FAILURE; + goto cleanup; + } + +cleanup: + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLmsVerify( + _In_ PCSYMCRYPT_LMS_KEY pKey, + _In_reads_bytes_(cbMessage) PCBYTE pbMessage, + SIZE_T cbMessage, + UINT32 flags, + _In_reads_bytes_(cbSignature) PCBYTE pbSignature, + SIZE_T cbSignature) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + SYMCRYPT_RUN_SELFTEST_ONCE( + SymCryptLmsSelftest, + SYMCRYPT_SELFTEST_ALGORITHM_LMS); + + scError = SymCryptLmsVerifyInternal( + pKey, + pbMessage, + cbMessage, + flags, + pbSignature, + cbSignature); + + return scError; +} diff --git a/libs/symcrypt/lib/marvin32.c b/libs/symcrypt/lib/marvin32.c new file mode 100644 index 00000000000..9e92633bebc --- /dev/null +++ b/libs/symcrypt/lib/marvin32.c @@ -0,0 +1,331 @@ +// +// Marvin32.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +// +// This module contains the routines to implement the Marvin32 checksum function +// +// + +#include "precomp.h" + +// +// See the symcrypt.h file for documentation on what the various functions do. +// + + +// +// Default initial seed, first 8 bytes of SHA256( "Marvin32" ); +// +static const SYMCRYPT_MARVIN32_EXPANDED_SEED SymCryptMarvin32DefaultSeedStruct = { + {0xcd0893b7, 0xd53cd9ce}, +#if defined( SYMCRYPT_MAGIC_ENABLED ) + SYMCRYPT_MAGIC_VALUE( &SymCryptMarvin32DefaultSeedStruct ), +#endif + }; + +PCSYMCRYPT_MARVIN32_EXPANDED_SEED const SymCryptMarvin32DefaultSeed = &SymCryptMarvin32DefaultSeedStruct; + +// +// Round rotation amounts. This array is optimized away by the compiler +// as we inline all our rotations. +// +static const int rotate[4] = { + 20, 9, 27, 19, +}; + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMarvin32ExpandSeed( + _Out_ PSYMCRYPT_MARVIN32_EXPANDED_SEED pExpandedSeed, + _In_reads_(cbSeed) PCBYTE pbSeed, + SIZE_T cbSeed ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + if( cbSeed != SYMCRYPT_MARVIN32_SEED_SIZE ) + { + scError = SYMCRYPT_WRONG_KEY_SIZE; + goto cleanup; + } + pExpandedSeed->s[0] = SYMCRYPT_LOAD_LSBFIRST32( pbSeed ); + pExpandedSeed->s[1] = SYMCRYPT_LOAD_LSBFIRST32( pbSeed + 4 ); + + SYMCRYPT_SET_MAGIC( pExpandedSeed ); + +cleanup: + return scError; +} + +VOID +SYMCRYPT_CALL +SymCryptMarvin32SeedCopy( _In_ PCSYMCRYPT_MARVIN32_EXPANDED_SEED pSrc, + _Out_ PSYMCRYPT_MARVIN32_EXPANDED_SEED pDst ) +{ + SYMCRYPT_CHECK_MAGIC( pSrc ); + *pDst = *pSrc; + SYMCRYPT_SET_MAGIC( pDst ); +} + + +VOID +SYMCRYPT_CALL +SymCryptMarvin32StateCopy( + _In_ PCSYMCRYPT_MARVIN32_STATE pSrc, + _In_opt_ PCSYMCRYPT_MARVIN32_EXPANDED_SEED pExpandedSeed, + _Out_ PSYMCRYPT_MARVIN32_STATE pDst ) +{ + SYMCRYPT_CHECK_MAGIC( pSrc ); + *pDst = *pSrc; + + if( pExpandedSeed == NULL ) + { + SYMCRYPT_CHECK_MAGIC( pSrc->pSeed ); + pDst->pSeed = pSrc->pSeed; + } + else + { + SYMCRYPT_CHECK_MAGIC( pExpandedSeed ); + pDst->pSeed = pExpandedSeed; + } + + SYMCRYPT_SET_MAGIC( pDst ); +} + + +VOID +SYMCRYPT_CALL +SymCryptMarvin32Init( _Out_ PSYMCRYPT_MARVIN32_STATE pState, + _In_ PCSYMCRYPT_MARVIN32_EXPANDED_SEED pExpandedSeed) +{ + pState->chain = *pExpandedSeed; + pState->dataLength = 0; + pState->pSeed = pExpandedSeed; + + *(UINT32 *) &pState->buffer[4] = 0; // wipe the last 4 bytes of the buffer. + + SYMCRYPT_SET_MAGIC( pState ); +} + + +// +// SymCryptMarvin32Append +// + +VOID +SYMCRYPT_CALL +SymCryptMarvin32Append( _Inout_ PSYMCRYPT_MARVIN32_STATE state, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + UINT32 bytesInBuffer = state->dataLength; + + SYMCRYPT_CHECK_MAGIC( state ); + + state->dataLength += (UINT32) cbData; // We only keep track of the last 2 bits... + +#define ALG MARVIN32 +#define Alg Marvin32 +#include "hash_buffer_pattern.c" +#undef ALG +#undef Alg + +} + + +// +// SymCryptMarvin32Result +// +VOID +SYMCRYPT_CALL +SymCryptMarvin32Result( + _Inout_ PSYMCRYPT_MARVIN32_STATE pState, + _Out_writes_( SYMCRYPT_MARVIN32_RESULT_SIZE ) PBYTE pbResult ) +{ + SIZE_T bytesInBuffer = ( pState->dataLength) & 0x3; + + SYMCRYPT_CHECK_MAGIC( pState ); + + // + // Wipe four bytes in the buffer. + // Doing this first ensures that this write is aligned when the input was of + // length 0 mod 4. + // The buffer is 8 bytes long, so we never overwrite anything else. + // + *(UINT32 *) &pState->buffer[bytesInBuffer] = 0; + + // + // The buffer is never completely full, so we can always put the first + // padding byte in. + // + pState->buffer[bytesInBuffer++] = 0x80; + + // + // Process the final block + // + SymCryptMarvin32AppendBlocks( &pState->chain, pState->buffer, 8 ); + + SYMCRYPT_STORE_LSBFIRST32( pbResult , pState->chain.s[0] ); + SYMCRYPT_STORE_LSBFIRST32( pbResult + 4, pState->chain.s[1] ); + + // + // Wipe only those things that we need to wipe. + // + + *(UINT32 *) &pState->buffer[0] = 0; + pState->dataLength = 0; + + pState->chain = *pState->pSeed; +} + +#define BLOCK( a, b ) \ +{\ + b ^= a; a = ROL32( a, rotate[0] );\ + a += b; b = ROL32( b, rotate[1] );\ + b ^= a; a = ROL32( a, rotate[2] );\ + a += b; b = ROL32( b, rotate[3] );\ +} + +VOID +SYMCRYPT_CALL +SymCryptMarvin32AppendBlocks( + _Inout_ PSYMCRYPT_MARVIN32_CHAINING_STATE pChain, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + UINT32 s0 = pChain->s[0]; + UINT32 s1 = pChain->s[1]; + + SIZE_T bytesInFirstBlock = cbData & 0xc; // 0, 4, 8, or 12 + + SYMCRYPT_ASSERT( (cbData & 3) == 0 ); + + + pbData += bytesInFirstBlock; + cbData -= bytesInFirstBlock; + + switch( bytesInFirstBlock ) + { + case 0: // This handles the cbData == 0 case too + while( cbData > 0 ) + { + pbData += 16; + cbData -= 16; + + s0 += SYMCRYPT_LOAD_LSBFIRST32( pbData - 16 ); + BLOCK( s0, s1 ); + case 12: + s0 += SYMCRYPT_LOAD_LSBFIRST32( pbData - 12 ); + BLOCK( s0, s1 ); + case 8: + s0 += SYMCRYPT_LOAD_LSBFIRST32( pbData - 8 ); + BLOCK( s0, s1 ); + case 4: + s0 += SYMCRYPT_LOAD_LSBFIRST32( pbData - 4 ); + BLOCK( s0, s1 ); + } + } + + pChain->s[0] = s0; + pChain->s[1] = s1; +} + + +VOID +SYMCRYPT_CALL +SymCryptMarvin32( + _In_ PCSYMCRYPT_MARVIN32_EXPANDED_SEED pExpandedSeed, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_MARVIN32_RESULT_SIZE ) PBYTE pbResult ) +// +// To reduce the per-computation overhead, we have a dedicated code here instead of the whole Init/Append/Result stuff. +// +{ + UINT32 tmp; + + UINT32 s0 = pExpandedSeed->s[0]; + UINT32 s1 = pExpandedSeed->s[1]; + + while( cbData > 7 ) + { + s0 += SYMCRYPT_LOAD_LSBFIRST32( pbData ); + BLOCK( s0, s1 ); + s0 += SYMCRYPT_LOAD_LSBFIRST32( pbData + 4 ); + BLOCK( s0, s1 ); + pbData += 8; + cbData -= 8; + } + + /* + switch( cbData ) + { + case 3: + buf[2] = pbData[2]; + case 2: + *(UINT16 *) &buf[0] = *(UINT16 *) pbData; + break; + case 1: + buf[0] = pbData[0]; + case 0: + ; + } + + buf[ cbData ] = 0x80; + + s0 += LOAD_LSBFIRST32( buf ); + */ + + + switch( cbData ) + { + default: + case 4: s0 += SYMCRYPT_LOAD_LSBFIRST32( pbData ); BLOCK( s0, s1 ); pbData += 4; + case 0: tmp = 0x80; break; + + case 5: s0 += SYMCRYPT_LOAD_LSBFIRST32( pbData ); BLOCK( s0, s1 ); pbData += 4; + case 1: tmp = 0x8000 | pbData[0]; break; + + case 6: s0 += SYMCRYPT_LOAD_LSBFIRST32( pbData ); BLOCK( s0, s1 ); pbData += 4; + case 2: tmp = 0x800000 | SYMCRYPT_LOAD_LSBFIRST16( pbData ); break; + + case 7: s0 += SYMCRYPT_LOAD_LSBFIRST32( pbData ); BLOCK( s0, s1 ); pbData += 4; + case 3: tmp = SYMCRYPT_LOAD_LSBFIRST16( pbData ) | (pbData[2] << 16) | 0x80000000; break; + } + s0 += tmp; + + + BLOCK( s0, s1 ); + BLOCK( s0, s1 ); + + SYMCRYPT_STORE_LSBFIRST32( pbResult , s0 ); + SYMCRYPT_STORE_LSBFIRST32( pbResult + 4, s1 ); +} + + + +// +// Simple test vector +// + +static const BYTE marvin32KATAnswer[ 8 ] = { + 0xbf, 0x69, 0x27, 0x49, 0x39, 0x43, 0xc7, 0x22, +} ; + +VOID +SYMCRYPT_CALL +SymCryptMarvin32Selftest(void) +{ + BYTE res[SYMCRYPT_MARVIN32_RESULT_SIZE]; + + SymCryptMarvin32( SymCryptMarvin32DefaultSeed, SymCryptTestMsg3, sizeof( SymCryptTestMsg3 ), res ); + + SymCryptInjectError( res, sizeof( res ) ); + if( memcmp( res, marvin32KATAnswer, sizeof( res ) ) != 0 ) + { + SymCryptFatal( 'marv' ); + } +} diff --git a/libs/symcrypt/lib/md2.c b/libs/symcrypt/lib/md2.c new file mode 100644 index 00000000000..b754df333ce --- /dev/null +++ b/libs/symcrypt/lib/md2.c @@ -0,0 +1,307 @@ +// +// Md2.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// +// This module contains the routines to implement MD2 from RFC 1319 +// +// This is a new implementation, NOT based on the existing one in RSA32.lib, +// which is the one from RSA data security. +// +// The implementation had to be refreshed anyway to conform to our coding +// guidelines for cryptographic functions. +// Re-implementing the function along the lines of our SHA-family implementations +// was easy, and it removes a file with RSA copyright from our system. +// +// The only data copied for this implementation is the S table from the +// RFC. +// + +#include "precomp.h" + +// +// See the symcrypt.h file for documentation on what the various functions do. +// + +const SYMCRYPT_HASH SymCryptMd2Algorithm_default = { + &SymCryptMd2Init, + &SymCryptMd2Append, + &SymCryptMd2Result, + &SymCryptMd2AppendBlocks, + &SymCryptMd2StateCopy, + sizeof( SYMCRYPT_MD2_STATE ), + SYMCRYPT_MD2_RESULT_SIZE, + SYMCRYPT_MD2_INPUT_BLOCK_SIZE, + SYMCRYPT_FIELD_OFFSET( SYMCRYPT_MD2_STATE, chain ), + SYMCRYPT_FIELD_SIZE( SYMCRYPT_MD2_STATE, chain ), +}; + +const PCSYMCRYPT_HASH SymCryptMd2Algorithm = &SymCryptMd2Algorithm_default; + +// +// These entries are called S[i] in RFC1319 +// +const BYTE SymCryptMd2STable[256] = { + 41, 46, 67, 201, 162, 216, 124, 1, 61, 54, 84, 161, 236, 240, 6, + 19, 98, 167, 5, 243, 192, 199, 115, 140, 152, 147, 43, 217, 188, + 76, 130, 202, 30, 155, 87, 60, 253, 212, 224, 22, 103, 66, 111, 24, + 138, 23, 229, 18, 190, 78, 196, 214, 218, 158, 222, 73, 160, 251, + 245, 142, 187, 47, 238, 122, 169, 104, 121, 145, 21, 178, 7, 63, + 148, 194, 16, 137, 11, 34, 95, 33, 128, 127, 93, 154, 90, 144, 50, + 39, 53, 62, 204, 231, 191, 247, 151, 3, 255, 25, 48, 179, 72, 165, + 181, 209, 215, 94, 146, 42, 172, 86, 170, 198, 79, 184, 56, 210, + 150, 164, 125, 182, 118, 252, 107, 226, 156, 116, 4, 241, 69, 157, + 112, 89, 100, 113, 135, 32, 134, 91, 207, 101, 230, 45, 168, 2, 27, + 96, 37, 173, 174, 176, 185, 246, 28, 70, 97, 105, 52, 64, 126, 15, + 85, 71, 163, 35, 221, 81, 175, 58, 195, 92, 249, 206, 186, 197, + 234, 38, 44, 83, 13, 110, 133, 40, 132, 9, 211, 223, 205, 244, 65, + 129, 77, 82, 106, 220, 55, 200, 108, 193, 171, 250, 36, 225, 123, + 8, 12, 189, 177, 74, 120, 136, 149, 139, 227, 99, 232, 109, 233, + 203, 213, 254, 59, 0, 29, 57, 242, 239, 183, 14, 102, 88, 208, 228, + 166, 119, 114, 248, 235, 117, 75, 10, 49, 68, 80, 180, 143, 237, + 31, 26, 219, 153, 141, 51, 159, 17, 131, 20 +}; + + +// +// SymCryptMd2 +// +#define ALG MD2 +#define Alg Md2 +#include "hash_pattern.c" +#undef ALG +#undef Alg + + +// +// SymCryptMd2Init +// +VOID +SYMCRYPT_CALL +SymCryptMd2Init( _Out_ PSYMCRYPT_MD2_STATE pState ) +{ + // + // We use the secure wipe as the init routine is also used to re-initialize + // (and wipe) the state after a hash computation. + // In that case the compiler might conclude that this wipe can be optimized + // away, and that would leak data. + // + SymCryptWipeKnownSize( pState, sizeof( *pState ) ); + SYMCRYPT_SET_MAGIC( pState ); +} + + +// +// SymCryptMd2Append +// +VOID +SYMCRYPT_CALL +SymCryptMd2Append( _Inout_ PSYMCRYPT_MD2_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + SymCryptHashAppendInternal( SymCryptMd2Algorithm, (PSYMCRYPT_COMMON_HASH_STATE)pState, pbData, cbData ); +} + + +// +// SymCryptMd2Result +// +VOID +SYMCRYPT_CALL +SymCryptMd2Result( _Inout_ PSYMCRYPT_MD2_STATE state, + _Out_writes_( SYMCRYPT_MD2_RESULT_SIZE ) PBYTE pbResult ) +{ + // + // The buffer is never completely full, so it is easy to compute the actual padding. + // + SIZE_T tmp; + SIZE_T paddingBytes = 16 - state->bytesInBuffer; + + + SYMCRYPT_CHECK_MAGIC( state ); + + memset( &state->buffer[state->bytesInBuffer], (BYTE)paddingBytes, paddingBytes ); + + SymCryptMd2AppendBlocks( &state->chain, state->buffer, SYMCRYPT_MD2_INPUT_BLOCK_SIZE, &tmp ); + + // + // Append the checksum + // + SymCryptMd2AppendBlocks( &state->chain, state->chain.C, SYMCRYPT_MD2_INPUT_BLOCK_SIZE, &tmp ); + + memcpy( pbResult, &state->chain.X[0], SYMCRYPT_MD2_RESULT_SIZE ); + + // + // Wipe & re-initialize + // + // (Our init code wipes the buffer too, so we don't have to.) + // + SymCryptMd2Init( state ); +} + + +VOID +SYMCRYPT_CALL +SymCryptMd2AppendBlocks( + _Inout_ PSYMCRYPT_MD2_CHAINING_STATE pChain, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ) +{ + // + // For variable names see RFC 1319. + // + unsigned int t; + int j,k; + + while( cbData >= SYMCRYPT_MD2_INPUT_BLOCK_SIZE ) + { + BYTE L; + // + // read the data once into our structure + // + memcpy( &pChain->X[16], pbData, SYMCRYPT_MD2_INPUT_BLOCK_SIZE ); + + // + // Update the checksum block. + // The L value at the end of the previous block is in the last byte of the checksum + // + L = pChain->C[15]; + + for( j=0; j<16; j++ ) + { + pChain->C[j] = L = pChain->C[j] ^ SymCryptMd2STable[ L ^ pChain->X[16+j] ]; + } + + // + // Now we compute the actual hash + // + SymCryptXorBytes( &pChain->X[0], &pChain->X[16], &pChain->X[32], 16 ); + + t = 0; + for( j=0; j<18; j++ ) + { + for( k=0; k<48; k++ ) + { + t = pChain->X[k] ^ SymCryptMd2STable[t]; + pChain->X[k] = (BYTE) t; + } + t = (t + j)& 0xff; + } + + pbData += SYMCRYPT_MD2_INPUT_BLOCK_SIZE; + cbData -= SYMCRYPT_MD2_INPUT_BLOCK_SIZE; + } + + *pcbRemaining = cbData; +} + + +VOID +SYMCRYPT_CALL +SymCryptMd2StateExport( + _In_ PCSYMCRYPT_MD2_STATE pState, + _Out_writes_bytes_( SYMCRYPT_MD2_STATE_EXPORT_SIZE ) PBYTE pbBlob ) +{ + SYMCRYPT_ALIGN SYMCRYPT_MD2_STATE_EXPORT_BLOB blob; // local copy to have proper alignment. + C_ASSERT( sizeof( blob ) == SYMCRYPT_MD2_STATE_EXPORT_SIZE ); + + SYMCRYPT_CHECK_MAGIC( pState ); + + SymCryptWipeKnownSize( &blob, sizeof( blob ) ); // wipe to avoid any data leakage + + blob.header.magic = SYMCRYPT_BLOB_MAGIC; + blob.header.size = SYMCRYPT_MD2_STATE_EXPORT_SIZE; + blob.header.type = SymCryptBlobTypeMd2State; + + // + // Copy the relevant data. Buffer will be 0-padded. + // + memcpy( &blob.C[0], &pState->chain.C[0], 16 ); + memcpy( &blob.X[0], &pState->chain.X[0], 16 ); + blob.bytesInBuffer = (UINT32) pState->bytesInBuffer; + memcpy( &blob.buffer[0], &pState->buffer[0], blob.bytesInBuffer ); + + SYMCRYPT_ASSERT( (PCBYTE) &blob + sizeof( blob ) - sizeof( SYMCRYPT_BLOB_TRAILER ) == (PCBYTE) &blob.trailer ); + SymCryptMarvin32( SymCryptMarvin32DefaultSeed, (PCBYTE) &blob, sizeof( blob ) - sizeof( SYMCRYPT_BLOB_TRAILER ), &blob.trailer.checksum[0] ); + + memcpy( pbBlob, &blob, sizeof( blob ) ); + +//cleanup: + SymCryptWipeKnownSize( &blob, sizeof( blob ) ); + return; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMd2StateImport( + _Out_ PSYMCRYPT_MD2_STATE pState, + _In_reads_bytes_( SYMCRYPT_MD2_STATE_EXPORT_SIZE) PCBYTE pbBlob ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + SYMCRYPT_ALIGN SYMCRYPT_MD2_STATE_EXPORT_BLOB blob; // local copy to have proper alignment. + BYTE checksum[8]; + + C_ASSERT( sizeof( blob ) == SYMCRYPT_MD2_STATE_EXPORT_SIZE ); + memcpy( &blob, pbBlob, sizeof( blob ) ); + + if( blob.header.magic != SYMCRYPT_BLOB_MAGIC || + blob.header.size != SYMCRYPT_MD2_STATE_EXPORT_SIZE || + blob.header.type != SymCryptBlobTypeMd2State ) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + SymCryptMarvin32( SymCryptMarvin32DefaultSeed, (PCBYTE) &blob, sizeof( blob ) - sizeof( SYMCRYPT_BLOB_TRAILER ), checksum ); + if( memcmp( checksum, &blob.trailer.checksum[0], 8 ) != 0 ) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + memcpy( &pState->chain.C[0], &blob.C[0], 16 ); + memcpy( &pState->chain.X[0], &blob.X[0], 16 ); + memcpy( &pState->buffer[0], &blob.buffer[0], 16 ); + pState->bytesInBuffer = blob.bytesInBuffer; + + pState->dataLengthL = blob.bytesInBuffer; + pState->dataLengthH = 1; + + SYMCRYPT_SET_MAGIC( pState ); + +cleanup: + SymCryptWipeKnownSize( &blob, sizeof(blob) ); + return scError; +} + + + + + + +// +// Simple test vector for FIPS module testing +// + +static const BYTE md2KATAnswer[ 16 ] = { + 0xda, 0x85, 0x3b, 0x0d, 0x3f, 0x88, 0xd9, 0x9b, + 0x30, 0x28, 0x3a, 0x69, 0xe6, 0xde, 0xd6, 0xbb, +} ; + +VOID +SYMCRYPT_CALL +SymCryptMd2Selftest(void) +{ + BYTE result[SYMCRYPT_MD2_RESULT_SIZE]; + + SymCryptMd2( SymCryptTestMsg3, sizeof( SymCryptTestMsg3 ), result ); + + SymCryptInjectError( result, sizeof( result ) ); + + if( memcmp( result, md2KATAnswer, sizeof( result ) ) != 0 ) { + SymCryptFatal( 'MD2t' ); + } +} diff --git a/libs/symcrypt/lib/md4.c b/libs/symcrypt/lib/md4.c new file mode 100644 index 00000000000..759e317c3a8 --- /dev/null +++ b/libs/symcrypt/lib/md4.c @@ -0,0 +1,425 @@ +// +// Md4.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +// +// This module contains the routines to implement MD4 from RFC 1320 +// +// +// This is a new implementation, NOT based on the existing ones in RSA32.lib. +// There are 2 versions in RSA32.lib, one from RSA data security and one from +// Scott Fields. +// +// MD4 and MD5 are extremely similar. Having already done a new MD5 implementation it +// was very little work to copy the code & turn it into an MD4 implementation. +// In fact, it was easier than reviewing & modifying the old code to bring it up to +// the current implementation guidelines. +// +// This also ensures that this file is not a derived work from RSA data security +// code which simplifies the copyright situation. +// +// We dropped the assembler implementation. MD4 is so weak that it should be removed +// from use, not sped up. +// + +#include "precomp.h" + +// +// See the symcrypt.h file for documentation on what the various functions do. +// + +const SYMCRYPT_HASH SymCryptMd4Algorithm_default = { + &SymCryptMd4Init, + &SymCryptMd4Append, + &SymCryptMd4Result, + &SymCryptMd4AppendBlocks, + &SymCryptMd4StateCopy, + sizeof( SYMCRYPT_MD4_STATE ), + SYMCRYPT_MD4_RESULT_SIZE, + SYMCRYPT_MD4_INPUT_BLOCK_SIZE, + SYMCRYPT_FIELD_OFFSET( SYMCRYPT_MD4_STATE, chain ), + SYMCRYPT_FIELD_SIZE( SYMCRYPT_MD4_STATE, chain ), +}; + +const PCSYMCRYPT_HASH SymCryptMd4Algorithm = &SymCryptMd4Algorithm_default; + +// +// The round constants used by MD4 +// +// +static const UINT32 md4Const[3] = { + 0x00000000UL, + 0x5A827999UL, + 0x6ED9EBA1UL, + }; + +// +// Round rotation amounts. This array is optimized away by the compiler +// as we inline all our rotations. +// +static const int md4Rotate[48] = { + 3, 7, 11, 19, + 3, 7, 11, 19, + 3, 7, 11, 19, + 3, 7, 11, 19, + + 3, 5, 9, 13, + 3, 5, 9, 13, + 3, 5, 9, 13, + 3, 5, 9, 13, + + 3, 9, 11, 15, + 3, 9, 11, 15, + 3, 9, 11, 15, + 3, 9, 11, 15, + +}; + +// +// Message word index table. This array is optimized away by the compiler +// as we inline all our accesses. +// +static const int md4MsgIndex[48] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15, + 0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15, +}; + +// +// Initial state +// +static const UINT32 md4InitialState[4] = { + 0x67452301UL, + 0xefcdab89UL, + 0x98badcfeUL, + 0x10325476UL, +}; + + +// +// SymCryptMd4 +// +#define ALG MD4 +#define Alg Md4 +#include "hash_pattern.c" +#undef ALG +#undef Alg + + + +// +// SymCryptmd4Init +// +VOID +SYMCRYPT_CALL +SymCryptMd4Init( _Out_ PSYMCRYPT_MD4_STATE pState ) +{ + SYMCRYPT_SET_MAGIC( pState ); + + pState->dataLengthL = 0; + pState->dataLengthH = 0; + pState->bytesInBuffer = 0; + + memcpy( &pState->chain.H[0], &md4InitialState[0], sizeof( md4InitialState ) ); + + // + // There is no need to initialize the buffer part of the state as that will be + // filled before it is used. + // +} + + +// +// SymCryptMd4Append +// +VOID +SYMCRYPT_CALL +SymCryptMd4Append( _Inout_ PSYMCRYPT_MD4_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + SymCryptHashAppendInternal( SymCryptMd4Algorithm, (PSYMCRYPT_COMMON_HASH_STATE)pState, pbData, cbData ); +} + + +// +// SymCryptmd4Result +// +VOID +SYMCRYPT_CALL +SymCryptMd4Result( + _Inout_ PSYMCRYPT_MD4_STATE pState, + _Out_writes_( SYMCRYPT_MD4_RESULT_SIZE ) PBYTE pbResult ) +{ + SymCryptHashCommonPaddingMd4Style( SymCryptMd4Algorithm, (PSYMCRYPT_COMMON_HASH_STATE)pState ); + + // + // Write the output in the correct byte order + // + SymCryptUint32ToLsbFirst( &pState->chain.H[0], pbResult, 4 ); + + // + // Wipe & re-initialize + // We have to wipe the whole state as the initialization might be optimized away. + // + SymCryptWipeKnownSize( pState, sizeof( *pState )); + SymCryptMd4Init( pState ); + } + + +// +// For documentation on these function see rfc-1320 +// +//#define F( x, y, z ) (((x) & (y)) | ((~(x)) & (z))) +//#define G( x, y, z ) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) + +#define F( x, y, z ) ((((z) ^ (y)) & (x)) ^ (z)) +#define G( x, y, z ) ((((x) | (y)) & (z) ) | ((x) & (y))) +#define H( x, y, z ) ((x) ^ (y) ^ (z) ) + +// +// The values a-d are stored in an array called ad. +// We have unrolled the code completely. This makes both the indices into +// the ad array constant, and it makes the message addressing constant. +// +// We copy the message into our own buffer to obey the read-once rule. +// Memory is sometimes aliased so that multiple threads or processes can access +// the same memory at the same time. With MD4 there is a danger that some other +// process could modify the memory while the computation is ongoing and introduce +// changes in the computation not envisioned by the designers or cryptanalists. +// At this level in the library we cannot guarantee that this is not the case, +// and we can't trust the higher layers to respect a don't-change-it-while-computing-md4 +// restriction. (In practice, such restrictions are lost through the many +// layers in the stack.) +// + +// +// r is the round number +// ad[(r+0)%4] = a; +// ad[(r+1)%4] = d; +// ad[(r+2)%4] = c; +// ad[(r+3)%4] = b; +// +// When r increments the register re-naming is automatically correct. +// + +// +// CROUND is the core round function +// +#define CROUND( r, Func ) { \ + ad[r%4] = ROL32( ad[r%4] + Func(ad[(r+3)%4], ad[(r+2)%4], ad[(r+1)%4]) + Wt + md4Const[r/16], md4Rotate[r] ); \ +} + +// +// IROUND is the initial round that loads the message and copies it into our buffer. +// +#define IROUND( r, Func ) { \ + Wt = SYMCRYPT_LOAD_LSBFIRST32( &pbData[ 4*md4MsgIndex[r] ] ); \ + W[r] = Wt; \ + CROUND( r, Func ); \ +} + +// +// FROUND are the subsequent rounds. +// +#define FROUND( r, Func ) { \ + Wt = W[md4MsgIndex[r]];\ + CROUND( r, Func ); \ +} + +VOID +SYMCRYPT_CALL +SymCryptMd4AppendBlocks( + _Inout_ PSYMCRYPT_MD4_CHAINING_STATE pChain, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ) +{ + + SYMCRYPT_ALIGN UINT32 W[16]; + SYMCRYPT_ALIGN UINT32 ad[4]; + UINT32 Wt; + + ad[0] = pChain->H[0]; + ad[1] = pChain->H[3]; + ad[2] = pChain->H[2]; + ad[3] = pChain->H[1]; + + while( cbData >= 64 ) + { + // + // initial rounds 1 to 16 + // + + IROUND( 0, F ); + IROUND( 1, F ); + IROUND( 2, F ); + IROUND( 3, F ); + IROUND( 4, F ); + IROUND( 5, F ); + IROUND( 6, F ); + IROUND( 7, F ); + IROUND( 8, F ); + IROUND( 9, F ); + IROUND( 10, F ); + IROUND( 11, F ); + IROUND( 12, F ); + IROUND( 13, F ); + IROUND( 14, F ); + IROUND( 15, F ); + + FROUND( 16, G ); + FROUND( 17, G ); + FROUND( 18, G ); + FROUND( 19, G ); + FROUND( 20, G ); + FROUND( 21, G ); + FROUND( 22, G ); + FROUND( 23, G ); + FROUND( 24, G ); + FROUND( 25, G ); + FROUND( 26, G ); + FROUND( 27, G ); + FROUND( 28, G ); + FROUND( 29, G ); + FROUND( 30, G ); + FROUND( 31, G ); + + FROUND( 32, H ); + FROUND( 33, H ); + FROUND( 34, H ); + FROUND( 35, H ); + FROUND( 36, H ); + FROUND( 37, H ); + FROUND( 38, H ); + FROUND( 39, H ); + FROUND( 40, H ); + FROUND( 41, H ); + FROUND( 42, H ); + FROUND( 43, H ); + FROUND( 44, H ); + FROUND( 45, H ); + FROUND( 46, H ); + FROUND( 47, H ); + + pChain->H[0] = ad[0] = ad[0] + pChain->H[0]; + pChain->H[3] = ad[1] = ad[1] + pChain->H[3]; + pChain->H[2] = ad[2] = ad[2] + pChain->H[2]; + pChain->H[1] = ad[3] = ad[3] + pChain->H[1]; + + pbData += 64; + cbData -= 64; + } + + *pcbRemaining = cbData; + // + // Wipe the variables; + // + SymCryptWipeKnownSize( ad, sizeof( ad ) ); + SymCryptWipeKnownSize( W, sizeof( W ) ); + SYMCRYPT_FORCE_WRITE32( &Wt, 0 ); +} + +VOID +SYMCRYPT_CALL +SymCryptMd4StateExport( + _In_ PCSYMCRYPT_MD4_STATE pState, + _Out_writes_bytes_( SYMCRYPT_MD4_STATE_EXPORT_SIZE ) PBYTE pbBlob ) +{ + SYMCRYPT_ALIGN SYMCRYPT_MD4_STATE_EXPORT_BLOB blob; // local copy to have proper alignment. + C_ASSERT( sizeof( blob ) == SYMCRYPT_MD4_STATE_EXPORT_SIZE ); + + SYMCRYPT_CHECK_MAGIC( pState ); + + SymCryptWipeKnownSize( &blob, sizeof( blob ) ); // wipe to avoid any data leakage + + blob.header.magic = SYMCRYPT_BLOB_MAGIC; + blob.header.size = SYMCRYPT_MD4_STATE_EXPORT_SIZE; + blob.header.type = SymCryptBlobTypeMd4State; + + // + // Copy the relevant data. Buffer will be 0-padded. + // + + SymCryptUint32ToLsbFirst( &pState->chain.H[0], &blob.chain[0], 4 ); + blob.dataLength = pState->dataLengthL; + memcpy( &blob.buffer[0], &pState->buffer[0], blob.dataLength & 0x3f ); + + SYMCRYPT_ASSERT( (PCBYTE) &blob + sizeof( blob ) - sizeof( SYMCRYPT_BLOB_TRAILER ) == (PCBYTE) &blob.trailer ); + SymCryptMarvin32( SymCryptMarvin32DefaultSeed, (PCBYTE) &blob, sizeof( blob ) - sizeof( SYMCRYPT_BLOB_TRAILER ), &blob.trailer.checksum[0] ); + + memcpy( pbBlob, &blob, sizeof( blob ) ); + +//cleanup: + SymCryptWipeKnownSize( &blob, sizeof( blob ) ); + return; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMd4StateImport( + _Out_ PSYMCRYPT_MD4_STATE pState, + _In_reads_bytes_( SYMCRYPT_MD4_STATE_EXPORT_SIZE) PCBYTE pbBlob ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + SYMCRYPT_ALIGN SYMCRYPT_MD4_STATE_EXPORT_BLOB blob; // local copy to have proper alignment. + BYTE checksum[8]; + + C_ASSERT( sizeof( blob ) == SYMCRYPT_MD4_STATE_EXPORT_SIZE ); + memcpy( &blob, pbBlob, sizeof( blob ) ); + + if( blob.header.magic != SYMCRYPT_BLOB_MAGIC || + blob.header.size != SYMCRYPT_MD4_STATE_EXPORT_SIZE || + blob.header.type != SymCryptBlobTypeMd4State ) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + SymCryptMarvin32( SymCryptMarvin32DefaultSeed, (PCBYTE) &blob, sizeof( blob ) - sizeof( SYMCRYPT_BLOB_TRAILER ), checksum ); + if( memcmp( checksum, &blob.trailer.checksum[0], 8 ) != 0 ) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + SymCryptLsbFirstToUint32( &blob.chain[0], &pState->chain.H[0], 4 ); + pState->dataLengthL = blob.dataLength; + pState->dataLengthH = 0; + pState->bytesInBuffer = blob.dataLength & 0x3f; + memcpy( &pState->buffer[0], &blob.buffer[0], pState->bytesInBuffer ); + + SYMCRYPT_SET_MAGIC( pState ); + +cleanup: + SymCryptWipeKnownSize( &blob, sizeof(blob) ); + return scError; +} + + + +// +// Simple test vector for FIPS module testing +// + +static const BYTE md4KATAnswer[ 16 ] = { + 0xa4, 0x48, 0x01, 0x7a, 0xaf, 0x21, 0xd8, 0x52, + 0x5f, 0xc1, 0x0a, 0xe8, 0x7a, 0xa6, 0x72, 0x9d, +} ; + +VOID +SYMCRYPT_CALL +SymCryptMd4Selftest(void) +{ + BYTE result[SYMCRYPT_MD4_RESULT_SIZE]; + + SymCryptMd4( SymCryptTestMsg3, sizeof( SymCryptTestMsg3 ), result ); + + SymCryptInjectError( result, sizeof( result ) ); + + if( memcmp( result, md4KATAnswer, sizeof( result ) ) != 0 ) { + SymCryptFatal( 'MD4t' ); + } +} diff --git a/libs/symcrypt/lib/md5.c b/libs/symcrypt/lib/md5.c new file mode 100644 index 00000000000..c5de50f2646 --- /dev/null +++ b/libs/symcrypt/lib/md5.c @@ -0,0 +1,503 @@ +// +// Md5.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +// +// This module contains the routines to implement MD5 from RFC 1321 +// +// +// This is a new implementation, NOT based on the existing one in RSA32.lib, +// which is the one from RSA data security. RFC-1321 also contains code that +// at a glance looks very similar to the RSA32.lib code. +// +// The implementation had to be refreshed anyway to conform to our coding +// guidelines for cryptographic functions. +// Re-implementing the function along the lines of our SHA-family implementations +// was easy, and it removes one file with RSA copyright from our system. +// +// The only data copied for this implementation is the round constant values +// which were copied from the RFC. +// + +#include "precomp.h" + +// +// See the symcrypt.h file for documentation on what the various functions do. +// + +const SYMCRYPT_HASH SymCryptMd5Algorithm_default = { + &SymCryptMd5Init, + &SymCryptMd5Append, + &SymCryptMd5Result, + &SymCryptMd5AppendBlocks, + &SymCryptMd5StateCopy, + sizeof( SYMCRYPT_MD5_STATE ), + SYMCRYPT_MD5_RESULT_SIZE, + SYMCRYPT_MD5_INPUT_BLOCK_SIZE, + SYMCRYPT_FIELD_OFFSET( SYMCRYPT_MD5_STATE, chain ), + SYMCRYPT_FIELD_SIZE( SYMCRYPT_MD5_STATE, chain ), +}; + +const PCSYMCRYPT_HASH SymCryptMd5Algorithm = &SymCryptMd5Algorithm_default; + +// +// The round constants used by MD5 +// +// These are called T[i] in RFC1321 although T[i] uses the range [1..64] and we use [0..63] +// This array should be optimized away by the compiler as all values are inlined. +// +static const UINT32 md5Const[64] = { + 0xd76aa478UL, + 0xe8c7b756UL, + 0x242070dbUL, + 0xc1bdceeeUL, + 0xf57c0fafUL, + 0x4787c62aUL, + 0xa8304613UL, + 0xfd469501UL, + 0x698098d8UL, + 0x8b44f7afUL, + 0xffff5bb1UL, + 0x895cd7beUL, + 0x6b901122UL, + 0xfd987193UL, + 0xa679438eUL, + 0x49b40821UL, + 0xf61e2562UL, + 0xc040b340UL, + 0x265e5a51UL, + 0xe9b6c7aaUL, + 0xd62f105dUL, + 0x02441453UL, + 0xd8a1e681UL, + 0xe7d3fbc8UL, + 0x21e1cde6UL, + 0xc33707d6UL, + 0xf4d50d87UL, + 0x455a14edUL, + 0xa9e3e905UL, + 0xfcefa3f8UL, + 0x676f02d9UL, + 0x8d2a4c8aUL, + 0xfffa3942UL, + 0x8771f681UL, + 0x6d9d6122UL, + 0xfde5380cUL, + 0xa4beea44UL, + 0x4bdecfa9UL, + 0xf6bb4b60UL, + 0xbebfbc70UL, + 0x289b7ec6UL, + 0xeaa127faUL, + 0xd4ef3085UL, + 0x04881d05UL, + 0xd9d4d039UL, + 0xe6db99e5UL, + 0x1fa27cf8UL, + 0xc4ac5665UL, + 0xf4292244UL, + 0x432aff97UL, + 0xab9423a7UL, + 0xfc93a039UL, + 0x655b59c3UL, + 0x8f0ccc92UL, + 0xffeff47dUL, + 0x85845dd1UL, + 0x6fa87e4fUL, + 0xfe2ce6e0UL, + 0xa3014314UL, + 0x4e0811a1UL, + 0xf7537e82UL, + 0xbd3af235UL, + 0x2ad7d2bbUL, + 0xeb86d391UL, +}; + +// +// Round rotation amounts. This array is optimized away by the compiler +// as we inline all our rotations. +// +static const int md5Rotate[64] = { + 7, 12, 17, 22, + 7, 12, 17, 22, + 7, 12, 17, 22, + 7, 12, 17, 22, + + 5, 9, 14, 20, + 5, 9, 14, 20, + 5, 9, 14, 20, + 5, 9, 14, 20, + + 4, 11, 16, 23, + 4, 11, 16, 23, + 4, 11, 16, 23, + 4, 11, 16, 23, + + 6, 10, 15, 21, + 6, 10, 15, 21, + 6, 10, 15, 21, + 6, 10, 15, 21, +}; + +// +// Message word index table. This array is optimized away by the compiler +// as we inline all our accesses. +// +static const int md5MsgIndex[64] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 1, 6, 11, 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, + 5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, + 0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9, +}; + +// +// Initial state +// +static const UINT32 md5InitialState[4] = { + 0x67452301UL, + 0xefcdab89UL, + 0x98badcfeUL, + 0x10325476UL, +}; + +// +// SymCryptMd5 +// +#define ALG MD5 +#define Alg Md5 +#include "hash_pattern.c" +#undef ALG +#undef Alg + + + + +// +// SymCryptMd5Init +// +VOID +SYMCRYPT_CALL +SymCryptMd5Init( _Out_ PSYMCRYPT_MD5_STATE pState ) +{ + SYMCRYPT_SET_MAGIC( pState ); + + pState->dataLengthL = 0; + pState->dataLengthH = 0; + pState->bytesInBuffer = 0; + + memcpy( &pState->chain.H[0], &md5InitialState[0], sizeof( md5InitialState ) ); + + // + // There is no need to initialize the buffer part of the state as that will be + // filled before it is used. + // +} + + +// +// SymCryptMd5Append +// +VOID +SYMCRYPT_CALL +SymCryptMd5Append( + _Inout_ PSYMCRYPT_MD5_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + SymCryptHashAppendInternal( SymCryptMd5Algorithm, (PSYMCRYPT_COMMON_HASH_STATE)pState, pbData, cbData ); +} + +// +// SymCryptMd5Result +// +VOID +SYMCRYPT_CALL +SymCryptMd5Result( + _Inout_ PSYMCRYPT_MD5_STATE pState, + _Out_writes_( SYMCRYPT_MD5_RESULT_SIZE ) PBYTE pbResult ) +{ + SymCryptHashCommonPaddingMd4Style( SymCryptMd5Algorithm, (PSYMCRYPT_COMMON_HASH_STATE) pState ); + + // + // Write the output in the correct byte order + // + SymCryptUint32ToLsbFirst( &pState->chain.H[0], pbResult, 4 ); + + // + // Wipe & re-initialize + // We have to wipe the whole state because the Init call + // might be optimized away by a smart compiler. + // And we need to wipe old data. + // + SymCryptWipeKnownSize( pState, sizeof( *pState ) ); + SymCryptMd5Init( pState ); +} + + +// +// For documentation on these function see rfc-1321 +// +//#define F( x, y, z ) (((x) & (y)) ^ ((~(x)) & (z))) +#define F( x, y, z ) ((((z) ^ (y)) & (x)) ^ (z)) +#define G( x, y, z ) F( (z), (x), (y) ) +#define H( x, y, z ) ((x) ^ (y) ^ (z) ) +#define I( x, y, z ) ((y) ^ ((x) | ~(z))) + +// +// The values a-d are stored in an array called ad. +// We have unrolled the code completely. This makes both the indices into +// the ad array constant, and it makes the message addressing constant. +// +// We copy the message into our own buffer to obey the read-once rule. +// Memory is sometimes aliased so that multiple threads or processes can access +// the same memory at the same time. With MD5 there is a danger that some other +// process could modify the memory while the computation is ongoing and introduce +// changes in the computation not envisioned by the designers or cryptanalysts. +// At this level in the library we cannot guarantee that this is not the case, +// and we can't trust the higher layers to respect a don't-change-it-while-computing-md5 +// restriction. (In practice, such restrictions are lost through the many +// layers in the stack.) +// +// +// Initial round macro +// +// r is the round number +// ad[(r+0)%4] = a; +// ad[(r+1)%4] = d; +// ad[(r+2)%4] = c; +// ad[(r+3)%4] = b; +// +// When r increments the register re-naming is automatically correct. +// +#define CROUND( r, Func ) { \ + ad[r%4] = ad[(r+3)%4] + ROL32( ad[r%4] + Func(ad[(r+3)%4], ad[(r+2)%4], ad[(r+1)%4]) + Wt + md5Const[r], md5Rotate[r] ); \ +} + +#define IROUND( r, Func ) { \ + Wt = SYMCRYPT_LOAD_LSBFIRST32( &pbData[ 4*md5MsgIndex[r] ] ); \ + W[r] = Wt; \ + CROUND( r, Func ); \ +} + +// +// Subsequent rounds. +// This is the same as the IROUND except that it uses the copied message. +// +#define FROUND( r, Func ) { \ + Wt = W[md5MsgIndex[r]];\ + CROUND( r, Func ); \ +} + +VOID +SYMCRYPT_CALL +SymCryptMd5AppendBlocks( + _Inout_ SYMCRYPT_MD5_CHAINING_STATE * pChain, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ) +{ + + UINT32 W[16]; + UINT32 ad[4]; + UINT32 Wt; + + ad[0] = pChain->H[0]; + ad[1] = pChain->H[3]; + ad[2] = pChain->H[2]; + ad[3] = pChain->H[1]; + + while( cbData >= 64 ) + { + // + // initial rounds 1 to 16 + // + + IROUND( 0, F ); + IROUND( 1, F ); + IROUND( 2, F ); + IROUND( 3, F ); + IROUND( 4, F ); + IROUND( 5, F ); + IROUND( 6, F ); + IROUND( 7, F ); + IROUND( 8, F ); + IROUND( 9, F ); + IROUND( 10, F ); + IROUND( 11, F ); + IROUND( 12, F ); + IROUND( 13, F ); + IROUND( 14, F ); + IROUND( 15, F ); + + FROUND( 16, G ); + FROUND( 17, G ); + FROUND( 18, G ); + FROUND( 19, G ); + FROUND( 20, G ); + FROUND( 21, G ); + FROUND( 22, G ); + FROUND( 23, G ); + FROUND( 24, G ); + FROUND( 25, G ); + FROUND( 26, G ); + FROUND( 27, G ); + FROUND( 28, G ); + FROUND( 29, G ); + FROUND( 30, G ); + FROUND( 31, G ); + + FROUND( 32, H ); + FROUND( 33, H ); + FROUND( 34, H ); + FROUND( 35, H ); + FROUND( 36, H ); + FROUND( 37, H ); + FROUND( 38, H ); + FROUND( 39, H ); + FROUND( 40, H ); + FROUND( 41, H ); + FROUND( 42, H ); + FROUND( 43, H ); + FROUND( 44, H ); + FROUND( 45, H ); + FROUND( 46, H ); + FROUND( 47, H ); + + FROUND( 48, I ); + FROUND( 49, I ); + FROUND( 50, I ); + FROUND( 51, I ); + FROUND( 52, I ); + FROUND( 53, I ); + FROUND( 54, I ); + FROUND( 55, I ); + FROUND( 56, I ); + FROUND( 57, I ); + FROUND( 58, I ); + FROUND( 59, I ); + FROUND( 60, I ); + FROUND( 61, I ); + FROUND( 62, I ); + FROUND( 63, I ); + + pChain->H[0] = ad[0] = ad[0] + pChain->H[0]; + pChain->H[3] = ad[1] = ad[1] + pChain->H[3]; + pChain->H[2] = ad[2] = ad[2] + pChain->H[2]; + pChain->H[1] = ad[3] = ad[3] + pChain->H[1]; + + pbData += 64; + cbData -= 64; + } + + *pcbRemaining = cbData; + + // + // Wipe the variables; + // + SymCryptWipeKnownSize( ad, sizeof( ad ) ); + SymCryptWipeKnownSize( W, sizeof( W ) ); + SymCryptWipeKnownSize( &Wt, sizeof( Wt ) ); +} + +VOID +SYMCRYPT_CALL +SymCryptMd5StateExport( + _In_ PCSYMCRYPT_MD5_STATE pState, + _Out_writes_bytes_( SYMCRYPT_MD5_STATE_EXPORT_SIZE ) PBYTE pbBlob ) +{ + SYMCRYPT_MD5_STATE_EXPORT_BLOB blob; // local copy to have proper alignment. + C_ASSERT( sizeof( blob ) == SYMCRYPT_MD5_STATE_EXPORT_SIZE ); + + SYMCRYPT_CHECK_MAGIC( pState ); + + SymCryptWipeKnownSize( &blob, sizeof( blob ) ); // wipe to avoid any data leakage + + blob.header.magic = SYMCRYPT_BLOB_MAGIC; + blob.header.size = SYMCRYPT_MD5_STATE_EXPORT_SIZE; + blob.header.type = SymCryptBlobTypeMd5State; + + // + // Copy the relevant data. Buffer will be 0-padded. + // + + SymCryptUint32ToLsbFirst( &pState->chain.H[0], &blob.chain[0], 4 ); + blob.dataLength = pState->dataLengthL; + memcpy( &blob.buffer[0], &pState->buffer[0], blob.dataLength & 0x3f ); + + SYMCRYPT_ASSERT( (PCBYTE) &blob + sizeof( blob ) - sizeof( SYMCRYPT_BLOB_TRAILER ) == (PCBYTE) &blob.trailer ); + SymCryptMarvin32( SymCryptMarvin32DefaultSeed, (PCBYTE) &blob, sizeof( blob ) - sizeof( SYMCRYPT_BLOB_TRAILER ), &blob.trailer.checksum[0] ); + + memcpy( pbBlob, &blob, sizeof( blob ) ); + +//cleanup: + SymCryptWipeKnownSize( &blob, sizeof( blob ) ); + return; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMd5StateImport( + _Out_ PSYMCRYPT_MD5_STATE pState, + _In_reads_bytes_( SYMCRYPT_MD5_STATE_EXPORT_SIZE) PCBYTE pbBlob ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + SYMCRYPT_MD5_STATE_EXPORT_BLOB blob; // local copy to have proper alignment. + BYTE checksum[8]; + + C_ASSERT( sizeof( blob ) == SYMCRYPT_MD5_STATE_EXPORT_SIZE ); + memcpy( &blob, pbBlob, sizeof( blob ) ); + + if( blob.header.magic != SYMCRYPT_BLOB_MAGIC || + blob.header.size != SYMCRYPT_MD5_STATE_EXPORT_SIZE || + blob.header.type != SymCryptBlobTypeMd5State ) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + SymCryptMarvin32( SymCryptMarvin32DefaultSeed, (PCBYTE) &blob, sizeof( blob ) - sizeof( SYMCRYPT_BLOB_TRAILER ), checksum ); + if( memcmp( checksum, &blob.trailer.checksum[0], 8 ) != 0 ) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + SymCryptLsbFirstToUint32( &blob.chain[0], &pState->chain.H[0], 4 ); + pState->dataLengthL = blob.dataLength; + pState->dataLengthH = 0; + pState->bytesInBuffer = blob.dataLength & 0x3f; + memcpy( &pState->buffer[0], &blob.buffer[0], pState->bytesInBuffer ); + + SYMCRYPT_SET_MAGIC( pState ); + +cleanup: + SymCryptWipeKnownSize( &blob, sizeof(blob) ); + return scError; +} + + +// +// Simple test vector for FIPS module testing +// + +static const BYTE md5KATAnswer[ 16 ] = { + 0x90, 0x01, 0x50, 0x98, 0x3c, 0xd2, 0x4f, 0xb0, + 0xd6, 0x96, 0x3f, 0x7d, 0x28, 0xe1, 0x7f, 0x72, +} ; + +VOID +SYMCRYPT_CALL +SymCryptMd5Selftest(void) +{ + BYTE result[SYMCRYPT_MD5_RESULT_SIZE]; + + SymCryptMd5( SymCryptTestMsg3, sizeof( SymCryptTestMsg3 ), result ); + + SymCryptInjectError( result, sizeof( result ) ); + + if( memcmp( result, md5KATAnswer, sizeof( result ) ) != 0 ) { + SymCryptFatal( 'MD5t' ); + } +} diff --git a/libs/symcrypt/lib/mldsa.c b/libs/symcrypt/lib/mldsa.c new file mode 100644 index 00000000000..2dca3270368 --- /dev/null +++ b/libs/symcrypt/lib/mldsa.c @@ -0,0 +1,1096 @@ +// +// mldsa.c ML-DSA related functionality +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +_Use_decl_annotations_ +PSYMCRYPT_MLDSAKEY +SYMCRYPT_CALL +SymCryptMlDsakeyAllocate( + SYMCRYPT_MLDSA_PARAMS params ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PSYMCRYPT_MLDSAKEY pkMlDsakey = NULL; + PSYMCRYPT_MLDSA_INTERNAL_PARAMS pInternalParams = NULL; + PBYTE pbKey = NULL; + UINT32 cbKey = 0; + + scError = SymCryptMlDsaGetInternalParamsFromParams( params, &pInternalParams ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + cbKey = SYMCRYPT_INTERNAL_MLDSA_SIZEOF_KEY( pInternalParams->nRows, pInternalParams->nCols ); + pbKey = SymCryptCallbackAlloc( cbKey ); + if( pbKey == NULL ) + { + goto cleanup; + } + + pkMlDsakey = SymCryptMlDsakeyInitialize( pInternalParams, pbKey, cbKey ); + if( pkMlDsakey == NULL ) + { + goto cleanup; + } + + // On success, memory is owned by pkMlDsakey + pbKey = NULL; + +cleanup: + if( pbKey != NULL ) + { + SymCryptCallbackFree( pbKey ); + } + + return pkMlDsakey; +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsakeyFree( + PSYMCRYPT_MLDSAKEY pkMlDsakey ) +{ + SYMCRYPT_CHECK_MAGIC( pkMlDsakey ); + + SymCryptWipe( pkMlDsakey, pkMlDsakey->cbTotalSize ); + SymCryptCallbackFree( pkMlDsakey ); +} + +_Use_decl_annotations_ +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaKeyGenerateEx( + PSYMCRYPT_MLDSAKEY pkMlDsakey, + PCBYTE pbRootSeed, + SIZE_T cbRootSeed, + UINT32 flags ) +{ + UNREFERENCED_PARAMETER( flags ); + + SYMCRYPT_ASSERT( cbRootSeed == SYMCRYPT_MLDSA_ROOT_SEED_SIZE ); + + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams = pkMlDsakey->pParams; + PSYMCRYPT_MLDSA_INTERNAL_COMPUTATION_TEMPORARIES pTemps = NULL; + + BYTE privateVectorSeed[SYMCRYPT_MLDSA_PRIVATE_VECTOR_SEED_SIZE]; + + pTemps = SymCryptMlDsaTemporariesAllocateAndInitialize( + pParams, + 1, // row vectors + 0, // column vectors + 1, // poly elements + pParams->cbEncodedPublicKey ); // scratch space + if( pTemps == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + memcpy( pkMlDsakey->rootSeed, pbRootSeed, cbRootSeed ); + + { + PSYMCRYPT_SHAKE256_STATE pShakeState = &(pTemps->shake256State); + SymCryptShake256Init( pShakeState ); + SymCryptShake256Append( pShakeState, pkMlDsakey->rootSeed, cbRootSeed ); + SymCryptShake256Append( pShakeState, (PCBYTE) &pParams->nRows, sizeof(BYTE) ); + SymCryptShake256Append( pShakeState, (PCBYTE) &pParams->nCols, sizeof(BYTE) ); + + SymCryptShake256Extract( pShakeState, pkMlDsakey->publicSeed, sizeof(pkMlDsakey->publicSeed), FALSE); + SymCryptShake256Extract( pShakeState, privateVectorSeed, sizeof(privateVectorSeed), FALSE ); + SymCryptShake256Extract( pShakeState, pkMlDsakey->privateSigningSeed, sizeof(pkMlDsakey->privateSigningSeed), FALSE); // Wiped when pTemps is freed + } + + SymCryptMlDsaExpandA( pkMlDsakey->publicSeed, sizeof(pkMlDsakey->publicSeed), pkMlDsakey->pmA ); + + SymCryptMlDsaExpandS( + pkMlDsakey->pParams, + privateVectorSeed, + sizeof(privateVectorSeed), + pkMlDsakey->pvs1, + pkMlDsakey->pvs2 ); + + // Convert s1 and s2 to NTT form + SymCryptMlDsaVectorNTT( pkMlDsakey->pvs1 ); + SymCryptMlDsaVectorNTT( pkMlDsakey->pvs2 ); + + SymCryptMlDsakeyComputeT( + pkMlDsakey->pmA, + pkMlDsakey->pvs1, + pkMlDsakey->pvs2, + pkMlDsakey->pvt0, + pkMlDsakey->pvt1, + pTemps->pvRowVectors[0], + pTemps->pePolyElements[0] ); + + // Convert t0 and t1 to NTT form + SymCryptMlDsaVectorNTT( pkMlDsakey->pvt0 ); + SymCryptMlDsaVectorNTT( pkMlDsakey->pvt1 ); + + scError = SymCryptMlDsaPkEncode( pkMlDsakey, pTemps->pbScratch, pParams->cbEncodedPublicKey ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + SymCryptShake256( + pTemps->pbScratch, + pParams->cbEncodedPublicKey, + pkMlDsakey->publicKeyHash, + sizeof(pkMlDsakey->publicKeyHash) ); + + pkMlDsakey->hasRootSeed = TRUE; + pkMlDsakey->hasPrivateKey = TRUE; + +cleanup: + if( pTemps != NULL ) + { + SymCryptMlDsaTemporariesFree( pTemps ); + } + + SymCryptWipeKnownSize( privateVectorSeed, sizeof(privateVectorSeed) ); + + return scError; +} + +_Use_decl_annotations_ +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsakeyGenerate( + PSYMCRYPT_MLDSAKEY pkMlDsakey, + UINT32 flags) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + BYTE random[SYMCRYPT_MLDSA_ROOT_SEED_SIZE]; + PBYTE pbPctSignature = NULL; + SIZE_T cbPctSignature = 0; + + // Ensure only allowed flags are specified + UINT32 allowedFlags = SYMCRYPT_FLAG_KEY_NO_FIPS; + + if ( ( flags & ~allowedFlags ) != 0 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + scError = SymCryptCallbackRandom( random, sizeof(random) ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + scError = SymCryptMlDsakeySetValue( random, sizeof(random), SYMCRYPT_MLDSAKEY_FORMAT_PRIVATE_SEED, flags, pkMlDsakey ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // SymCryptMlDsakeySetValue ensures the self-test is run before + // first operational use of MlDsa + + if( ( flags & SYMCRYPT_FLAG_KEY_NO_FIPS ) == 0 ) + { + // PCT on key generation, sign/verify the empty message with the generated key + + cbPctSignature = pkMlDsakey->pParams->cbEncodedSignature; + + pbPctSignature = SymCryptCallbackAlloc( cbPctSignature ); + if( pbPctSignature == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + scError = SymCryptMlDsaSign( pkMlDsakey, + NULL, 0, + NULL, 0, + 0, + pbPctSignature, cbPctSignature ); + if( scError != SYMCRYPT_NO_ERROR ) + { + scError = SYMCRYPT_FIPS_FAILURE; + goto cleanup; + } + + scError = SymCryptMlDsaVerify( pkMlDsakey, + NULL, 0, + NULL, 0, + pbPctSignature, cbPctSignature, + 0 ); + if( scError != SYMCRYPT_NO_ERROR ) + { + scError = SYMCRYPT_FIPS_FAILURE; + goto cleanup; + } + + // could track having run the PCT with a flag in pkMlDsakey->fAlgorithmInfo, + // but currently no need to do that given we don't ever defer the PCT + } + +cleanup: + if( pbPctSignature != NULL ) + { + // Wiping is not required for security, but has low relative cost + // and better to be on the safe side for FIPS + SymCryptWipe( pbPctSignature, cbPctSignature ); + SymCryptCallbackFree( pbPctSignature ); + } + + SymCryptWipeKnownSize( random, sizeof(random) ); + + return scError; +} + +_Use_decl_annotations_ +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsakeySetValue( + PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_MLDSAKEY_FORMAT mlDsakeyFormat, + UINT32 flags, + PSYMCRYPT_MLDSAKEY pkMlDsakey ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + // Ensure only allowed flags are specified + UINT32 allowedFlags = SYMCRYPT_FLAG_KEY_NO_FIPS; + + if ( ( flags & ~allowedFlags ) != 0 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if( ( flags & SYMCRYPT_FLAG_KEY_NO_FIPS ) == 0 ) + { + // Ensure ML-DSA algorithm selftest is run before first use of ML-DSA algorithms; + // notably _before_ first full KeyGen + SYMCRYPT_RUN_SELFTEST_ONCE( + SymCryptMlDsaSelftest, + SYMCRYPT_SELFTEST_ALGORITHM_MLDSA); + } + + switch( mlDsakeyFormat ) + { + case SYMCRYPT_MLDSAKEY_FORMAT_PRIVATE_SEED: + if( cbSrc != SYMCRYPT_MLDSA_ROOT_SEED_SIZE ) + { + scError = SYMCRYPT_WRONG_KEY_SIZE; + goto cleanup; + } + + scError = SymCryptMlDsaKeyGenerateEx( pkMlDsakey, pbSrc, cbSrc, flags ); + break; + case SYMCRYPT_MLDSAKEY_FORMAT_PRIVATE_KEY: + scError = SymCryptMlDsaSkDecode( pbSrc, cbSrc, flags, pkMlDsakey ); + break; + case SYMCRYPT_MLDSAKEY_FORMAT_PUBLIC_KEY: + scError = SymCryptMlDsaPkDecode( pbSrc, cbSrc, flags, pkMlDsakey ); + break; + default: + scError = SYMCRYPT_INVALID_ARGUMENT; + break; + } + +cleanup: + return scError; +} + +_Use_decl_annotations_ +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsakeyGetValue( + PCSYMCRYPT_MLDSAKEY pkMlDsakey, + PBYTE pbDst, + SIZE_T cbDst, + SYMCRYPT_MLDSAKEY_FORMAT mlDsakeyFormat, + UINT32 flags ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + if( flags != 0 ) // No flags currently supported + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + switch( mlDsakeyFormat ) + { + case SYMCRYPT_MLDSAKEY_FORMAT_PRIVATE_KEY: + scError = SymCryptMlDsaSkEncode( + pkMlDsakey, + pbDst, + cbDst ); + break; + case SYMCRYPT_MLDSAKEY_FORMAT_PUBLIC_KEY: + scError = SymCryptMlDsaPkEncode( + pkMlDsakey, + pbDst, + cbDst ); + break; + case SYMCRYPT_MLDSAKEY_FORMAT_PRIVATE_SEED: + if( cbDst < SYMCRYPT_MLDSA_ROOT_SEED_SIZE ) + { + scError = SYMCRYPT_BUFFER_TOO_SMALL; + goto cleanup; + } + + if( !pkMlDsakey->hasRootSeed ) + { + scError = SYMCRYPT_INCOMPATIBLE_FORMAT; + goto cleanup; + } + + memcpy( pbDst, pkMlDsakey->rootSeed, SYMCRYPT_MLDSA_ROOT_SEED_SIZE ); + + break; + default: + scError = SYMCRYPT_INVALID_ARGUMENT; + break; + } + +cleanup: + return scError; +} + +_Use_decl_annotations_ +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaSignEx( + PCSYMCRYPT_MLDSAKEY pkMlDsakey, + PCBYTE pbInput, + SIZE_T cbInput, + PCBYTE pbContext, + SIZE_T cbContext, + PCBYTE pbHashOid, + SIZE_T cbHashOid, + PCBYTE pbRandom, + SIZE_T cbRandom, + UINT32 flags, + PBYTE pbSignature, + SIZE_T cbSignature ) +{ + SYMCRYPT_ASSERT( pkMlDsakey->hasPrivateKey == TRUE ); + SYMCRYPT_ASSERT( cbContext <= SYMCRYPT_MLDSA_CONTEXT_MAX_LENGTH ); + SYMCRYPT_ASSERT( cbRandom == SYMCRYPT_MLDSA_SIGNING_RANDOM_SIZE ); + SYMCRYPT_ASSERT( pbHashOid != NULL || cbHashOid == 0 ); + SYMCRYPT_ASSERT( pbContext != NULL || cbContext == 0 ); + SYMCRYPT_ASSERT( cbSignature == pkMlDsakey->pParams->cbEncodedSignature ); + SYMCRYPT_ASSERT( (flags & ~SYMCRYPT_FLAG_MLDSA_EXTERNALMU) == 0 ); + SYMCRYPT_ASSERT( ((flags & SYMCRYPT_FLAG_MLDSA_EXTERNALMU) == 0) || (pbContext == NULL && pbHashOid == NULL) ); + + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams = pkMlDsakey->pParams; + PSYMCRYPT_MLDSA_INTERNAL_COMPUTATION_TEMPORARIES pTemps = NULL; + + const UINT32 beta = (UINT32) pParams->nChallengeNonZeroCoeffs * pParams->privateKeyRange; + + BOOL bExternalMu = (flags & SYMCRYPT_FLAG_MLDSA_EXTERNALMU) != 0; + UINT8 modeId = (pbHashOid == NULL) ? 0 : 1; // 0 for ML-DSA, 1 for HashML-DSA + UINT8 cbContextByte = (UINT8) cbContext; + BYTE messageRepresentative[SYMCRYPT_SHAKE256_RESULT_SIZE]; + BYTE privateRandom[SYMCRYPT_SHAKE256_RESULT_SIZE]; + BYTE commitmentHash[64]; // Largest possible size for commitment hash + + const UINT32 cbw1Encoded = pParams->nRows * pParams->w1EncodeCoefficientBitLength * + ( SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8 ); + + pTemps = SymCryptMlDsaTemporariesAllocateAndInitialize( + pParams, + 2, // row vectors - W, W1, cs2, ct0, r0, hint (not all needed simultaneously) + 3, // column vectors - mask, cs1, response + 1, // poly element - challenge + cbw1Encoded ); // scratch space - w1 encoded + if( pTemps == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + PSYMCRYPT_SHAKE256_STATE pShakeState = &(pTemps->shake256State); + SymCryptShake256Init( pShakeState ); + + if ( bExternalMu ) + { + // Caller passes the externally-computed message representative mu + SYMCRYPT_ASSERT( cbInput == SYMCRYPT_SHAKE256_RESULT_SIZE ); + memcpy( messageRepresentative, pbInput, SYMCRYPT_SHAKE256_RESULT_SIZE ); + } + else + { + // Line 6: calculate message representative mu + // = SHAKE256( public key hash || modeId || cbContextByte || context || OID? || message/hash, 64 ) + // The OID is only included in the HashML-DSA mode + SymCryptShake256Append( pShakeState, pkMlDsakey->publicKeyHash, sizeof(pkMlDsakey->publicKeyHash) ); + SymCryptShake256Append( pShakeState, &modeId, sizeof( modeId ) ); + SymCryptShake256Append( pShakeState, &cbContextByte, sizeof( cbContextByte ) ); + + // These appends are no-ops if the length is zero + SymCryptShake256Append( pShakeState, pbContext, cbContext ); + SymCryptShake256Append( pShakeState, pbHashOid, cbHashOid ); + + SymCryptShake256Append( pShakeState, pbInput, cbInput ); + SymCryptShake256Result( pShakeState, messageRepresentative ); + } + + // Line 7: Calculate private random seed rho prime prime + // = SHAKE256( private signing seed K || pbRandom || message representative mu, 64 ) + SymCryptShake256Append( pShakeState, pkMlDsakey->privateSigningSeed, sizeof(pkMlDsakey->privateSigningSeed) ); + SymCryptShake256Append( pShakeState, pbRandom, cbRandom ); + SymCryptShake256Append( pShakeState, messageRepresentative, sizeof(messageRepresentative) ); + SymCryptShake256Result( pShakeState, privateRandom ); + + PSYMCRYPT_MLDSA_VECTOR pvW = pTemps->pvRowVectors[0]; + PSYMCRYPT_MLDSA_VECTOR pvHint = NULL; + + PSYMCRYPT_MLDSA_VECTOR pvMask = pTemps->pvColVectors[0]; + PSYMCRYPT_MLDSA_VECTOR pvResponse = pTemps->pvColVectors[1]; + PSYMCRYPT_MLDSA_VECTOR pvcs1 = pTemps->pvColVectors[2]; + + PBYTE pbW1Encoded = pTemps->pbScratch; + + UINT16 k = 0; + while( TRUE ) + { + SymCryptMlDsaExpandMask( + pParams, + pShakeState, + privateRandom, + sizeof(privateRandom), + k, + pvMask ); + + // Increment k early so we can continue to the next loop iteration when validity checks fail + // It's okay to leak how many iterations this loop takes because the SHAKE inputs and + // outputs are still unpredictable; this does not leak information about the private key + k += (UINT16) pParams->nCols; + + SymCryptMlDsaMatrixVectorMontMul( pkMlDsakey->pmA, pvMask, pvW, pTemps->pePolyElements[0] ); + + for(UINT8 i = 0; i < pvW->nElems; ++i) + { + SymCryptMlDsaPolyElementMulR( SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT(i, pvW) ); + } + + SymCryptMlDsaVectorINTT( pvW ); + + { + // Scope for pvW1 + PSYMCRYPT_MLDSA_VECTOR pvW1 = pTemps->pvRowVectors[1]; + SymCryptMlDsaVectorHighBits( pParams, pvW, pvW1 ); + SymCryptMlDsaVectorEncode( pvW1, pParams->w1EncodeCoefficientBitLength, 0, pbW1Encoded ); + } + + // Calculate commitment hash + SymCryptShake256Append( pShakeState, messageRepresentative, sizeof(messageRepresentative) ); + SymCryptShake256Append( pShakeState, pbW1Encoded, cbw1Encoded ); + SymCryptShake256Extract( pShakeState, commitmentHash, pParams->cbCommitmentHash, TRUE ); + + // Calculate challenge + // Reusing poly element 0 for challenge (previously temp space for multiplication) + PSYMCRYPT_MLDSA_POLYELEMENT peC = pTemps->pePolyElements[0]; + SymCryptMlDsaSampleInBall( pParams, commitmentHash, pParams->cbCommitmentHash, peC ); + + SymCryptMlDsaPolyElementNTT( peC ); + SymCryptMlDsaPolyElementMulR( peC ); + + { + // Scope for cs2 - reusing row vector 1, previously W1 + PSYMCRYPT_MLDSA_VECTOR pvcs2 = pTemps->pvRowVectors[1]; + SymCryptMlDsaVectorPolyElementMontMul( pkMlDsakey->pvs1, peC, pvcs1 ); + SymCryptMlDsaVectorPolyElementMontMul( pkMlDsakey->pvs2, peC, pvcs2 ); + + SymCryptMlDsaVectorINTT( pvcs1 ); + SymCryptMlDsaVectorINTT( pvcs2 ); + + SymCryptMlDsaVectorINTT( pvMask ); + SymCryptMlDsaVectorAdd( pvMask, pvcs1, pvResponse ); + + // (w - cs2) is an input to both LowBits (for r0) and MakeHint + SymCryptMlDsaVectorSub( pvW, pvcs2, pvW ); + } + + { + // Scope for r0 - reusing row vector 1, previously cs2 + PSYMCRYPT_MLDSA_VECTOR pvr0 = pTemps->pvRowVectors[1]; + SymCryptMlDsaVectorLowBits( pParams, pvW, pvr0 ); + + UINT32 zInfinityNorm = SymCryptMlDsaVectorInfinityNorm( pvResponse ); + UINT32 r0InfinityNorm = SymCryptMlDsaVectorInfinityNorm( pvr0 ); + + if( (zInfinityNorm >= (1 << pParams->maskCoefficientRangeLog2) - beta) || + (r0InfinityNorm >= pParams->commitmentRoundingRange - beta) ) + { + continue; + } + } + + { + // Scope for ct0 - reusing row vector 1, previously r0 + PSYMCRYPT_MLDSA_VECTOR pvct0 = pTemps->pvRowVectors[1]; + + SymCryptMlDsaVectorPolyElementMontMul( pkMlDsakey->pvt0, peC, pvct0 ); + SymCryptMlDsaVectorINTT( pvct0 ); + + UINT32 ct0InfinityNorm = SymCryptMlDsaVectorInfinityNorm( pvct0 ); + if( ct0InfinityNorm >= pParams->commitmentRoundingRange ) + { + continue; + } + + // MakeHint vectors + // w - cs2 = pvW + // w - cs2 + ct0 = pvct0 + pvW + SymCryptMlDsaVectorAdd( pvct0, pvW, pvct0 ); + + // Write hint in-place over ct0 + UINT32 nHintBitsSet = 0; + SymCryptMlDsaMakeHint( pParams, pvW, pvct0, pvct0, &nHintBitsSet ); + + if( nHintBitsSet > pParams->nHintNonZeroCoeffs ) + { + continue; + } + } + + // Row vector 1, previously ct0, now contains the hint + pvHint = pTemps->pvRowVectors[1]; + + break; + } + + SYMCRYPT_ASSERT( pvHint != NULL ); + + SymCryptMlDsaSigEncode( + pParams, + commitmentHash, + pParams->cbCommitmentHash, + pvResponse, + pvHint, + pbSignature, + cbSignature ); + +cleanup: + if( pTemps != NULL ) + { + SymCryptMlDsaTemporariesFree( pTemps ); + } + + return scError; +} + +_Use_decl_annotations_ +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaSign( + PCSYMCRYPT_MLDSAKEY pkMlDsakey, + PCBYTE pbMessage, + SIZE_T cbMessage, + PCBYTE pbContext, + SIZE_T cbContext, + UINT32 flags, + PBYTE pbSignature, + SIZE_T cbSignature ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + if( (flags != 0) || // No flags currently supported + (cbContext > SYMCRYPT_MLDSA_CONTEXT_MAX_LENGTH) || + (pkMlDsakey->hasPrivateKey == FALSE) || + (cbSignature != pkMlDsakey->pParams->cbEncodedSignature) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + BYTE random[SYMCRYPT_MLDSA_SIGNING_RANDOM_SIZE]; + scError = SymCryptCallbackRandom( random, sizeof(random) ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + scError = SymCryptMlDsaSignEx( + pkMlDsakey, + pbMessage, + cbMessage, + pbContext, + cbContext, + NULL, // pbHashOid + 0, // cbHashOid + random, + sizeof(random), + flags, + pbSignature, + cbSignature ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + +cleanup: + SymCryptWipeKnownSize( random, sizeof(random) ); + + return scError; +} + +_Use_decl_annotations_ +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptExternalMuMlDsaSign( + PCSYMCRYPT_MLDSAKEY pkMlDsakey, + PCBYTE pbMu, + SIZE_T cbMu, + UINT32 flags, + PBYTE pbSignature, + SIZE_T cbSignature ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + if( (flags != 0) || // No flags currently supported + (pkMlDsakey->hasPrivateKey == FALSE) || + (cbMu != SYMCRYPT_SHAKE256_RESULT_SIZE) || + (cbSignature != pkMlDsakey->pParams->cbEncodedSignature) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + BYTE random[SYMCRYPT_MLDSA_SIGNING_RANDOM_SIZE]; + scError = SymCryptCallbackRandom( random, sizeof(random) ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + scError = SymCryptMlDsaSignEx( + pkMlDsakey, + pbMu, + cbMu, + NULL, // pbContext + 0, // cbContext + NULL, // pbHashOid + 0, // cbHashOid + random, + sizeof(random), + SYMCRYPT_FLAG_MLDSA_EXTERNALMU, + pbSignature, + cbSignature ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + +cleanup: + SymCryptWipeKnownSize( random, sizeof(random) ); + + return scError; +} + +_Use_decl_annotations_ +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHashMlDsaSign( + PCSYMCRYPT_MLDSAKEY pkMlDsakey, + SYMCRYPT_PQDSA_HASH_ID hashAlg, + PCBYTE pbHash, + SIZE_T cbHash, + PCBYTE pbContext, + SIZE_T cbContext, + UINT32 flags, + PBYTE pbSignature, + SIZE_T cbSignature ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PCSYMCRYPT_OID pHashOid = NULL; + + if( (flags != 0) || // No flags currently supported + (cbContext > SYMCRYPT_MLDSA_CONTEXT_MAX_LENGTH) || + (pkMlDsakey->hasPrivateKey == FALSE) || + (cbSignature != pkMlDsakey->pParams->cbEncodedSignature) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + BYTE random[SYMCRYPT_MLDSA_SIGNING_RANDOM_SIZE]; + scError = SymCryptCallbackRandom( random, sizeof(random) ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + scError = SymCryptHashMlDsaValidateHashAlgAndGetOid( + pkMlDsakey->pParams, + hashAlg, + cbHash, + &pHashOid ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + scError = SymCryptMlDsaSignEx( + pkMlDsakey, + pbHash, + cbHash, + pbContext, + cbContext, + pHashOid->pbOID, + pHashOid->cbOID, + random, + sizeof(random), + flags, + pbSignature, + cbSignature ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + +cleanup: + SymCryptWipeKnownSize( random, sizeof(random) ); + + return scError; +} + +_Use_decl_annotations_ +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaVerifyEx( + PCSYMCRYPT_MLDSAKEY pkMlDsakey, + PCBYTE pbInput, + SIZE_T cbInput, + PCBYTE pbContext, + SIZE_T cbContext, + PCBYTE pbHashOid, + SIZE_T cbHashOid, + PCBYTE pbSignature, + SIZE_T cbSignature, + UINT32 flags ) +{ + UNREFERENCED_PARAMETER( flags ); + + SYMCRYPT_ASSERT( cbContext <= SYMCRYPT_MLDSA_CONTEXT_MAX_LENGTH ); + SYMCRYPT_ASSERT( pbHashOid != NULL || cbHashOid == 0 ); + SYMCRYPT_ASSERT( pbContext != NULL || cbContext == 0 ); + SYMCRYPT_ASSERT( cbSignature == pkMlDsakey->pParams->cbEncodedSignature ); + SYMCRYPT_ASSERT( (flags & ~SYMCRYPT_FLAG_MLDSA_EXTERNALMU) == 0 ); + SYMCRYPT_ASSERT( ((flags & SYMCRYPT_FLAG_MLDSA_EXTERNALMU) == 0) || (pbContext == NULL && pbHashOid == NULL) ); + + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams = pkMlDsakey->pParams; + PSYMCRYPT_MLDSA_INTERNAL_COMPUTATION_TEMPORARIES pTemps = NULL; + + const UINT32 beta = (UINT32) pParams->nChallengeNonZeroCoeffs * pParams->privateKeyRange; + + BOOL bExternalMu = (flags & SYMCRYPT_FLAG_MLDSA_EXTERNALMU) != 0; + UINT8 modeId = (pbHashOid == NULL) ? 0 : 1; // 0 for ML-DSA, 1 for HashML-DSA + UINT8 cbContextByte = (UINT8) cbContext; + BYTE messageRepresentative[SYMCRYPT_SHAKE256_RESULT_SIZE]; + BYTE commitmentHash[64]; // Largest possible size for commitment hash + BYTE recalculatedCommitmentHash[64]; + UINT32 responseInfinityNorm; + + const UINT32 cbw1Encoded = pParams->nRows * pParams->w1EncodeCoefficientBitLength * + ( SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8 ); + + pTemps = SymCryptMlDsaTemporariesAllocateAndInitialize( + pParams, + 4, // row vectors - hint, A*NTT(z), T1*(2^d), commitment + 1, // column vectors - response + 2, // poly elements - challenge, temp space for multiplication + cbw1Encoded ); // scratch space - w1 encoded + if( pTemps == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + // Row vectors + PSYMCRYPT_MLDSA_VECTOR pvHint = pTemps->pvRowVectors[0]; + PSYMCRYPT_MLDSA_VECTOR pvATimesNTTz = pTemps->pvRowVectors[1]; + PSYMCRYPT_MLDSA_VECTOR pvT1Times2D = pTemps->pvRowVectors[2]; + PSYMCRYPT_MLDSA_VECTOR pvCommitment = pTemps->pvRowVectors[3]; + + // Column vectors + PSYMCRYPT_MLDSA_VECTOR pvResponse = pTemps->pvColVectors[0]; + + // Poly elements + PSYMCRYPT_MLDSA_POLYELEMENT peC = pTemps->pePolyElements[0]; + PSYMCRYPT_MLDSA_POLYELEMENT peTmp = pTemps->pePolyElements[1]; + + PBYTE pbw1Encoded = pTemps->pbScratch; + + scError = SymCryptMlDsaSigDecode( + pParams, + pbSignature, + cbSignature, + commitmentHash, + pParams->cbCommitmentHash, + pvResponse, + pvHint ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + responseInfinityNorm = SymCryptMlDsaVectorInfinityNorm( pvResponse ); + + // For the signature to be valid, the response infinity norm must be <= (gamma_1 - beta) + // gamma_1 = (1 << maskCoefficientRangeLog2) + if( responseInfinityNorm >= (1 << pParams->maskCoefficientRangeLog2) - beta ) + { + scError = SYMCRYPT_SIGNATURE_VERIFICATION_FAILURE; + goto cleanup; + } + + SymCryptMlDsaSampleInBall( + pParams, + commitmentHash, + pParams->cbCommitmentHash, + peC ); + + SymCryptMlDsaVectorNTT( pvResponse ); + + for(UINT8 i = 0; i < pvResponse->nElems; ++i) + { + SymCryptMlDsaPolyElementMulR( SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT(i, pvResponse) ); + } + + SymCryptMlDsaMatrixVectorMontMul( + pkMlDsakey->pmA, + pvResponse, + pvATimesNTTz, + peTmp ); + + // TODO osgvsowi/55435592 - Consider precomputing t1 * 2^d + const UINT32 pow2DTimesR = 4214781; + for(UINT8 i = 0; i < pkMlDsakey->pvt1->nElems; ++i) + { + PSYMCRYPT_MLDSA_POLYELEMENT peSrc = SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pkMlDsakey->pvt1 ); + PSYMCRYPT_MLDSA_POLYELEMENT peDst = SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvT1Times2D ); + for(UINT32 j = 0; j < SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; ++j) + { + peDst->coeffs[j] = SymCryptMlDsaMontMul( peSrc->coeffs[j], pow2DTimesR ); + } + } + + SymCryptMlDsaPolyElementNTT( peC ); + SymCryptMlDsaPolyElementMulR( peC ); + + SymCryptMlDsaVectorPolyElementMontMul( + pvT1Times2D, + peC, + pvT1Times2D ); + + SymCryptMlDsaVectorSub( + pvATimesNTTz, + pvT1Times2D, + pvCommitment ); + + SymCryptMlDsaVectorINTT( pvCommitment ); + + SymCryptMlDsaUseHint( + pParams, + pvHint, + pvCommitment ); + + SymCryptMlDsaVectorEncode( + pvCommitment, + pParams->w1EncodeCoefficientBitLength, + 0, + pbw1Encoded ); + + PSYMCRYPT_SHAKE256_STATE pShakeState = &(pTemps->shake256State); + SymCryptShake256Init( pShakeState ); + + if ( bExternalMu ) + { + // Caller passes the externally-computed message representative mu + SYMCRYPT_ASSERT( cbInput == SYMCRYPT_SHAKE256_RESULT_SIZE ); + memcpy( messageRepresentative, pbInput, SYMCRYPT_SHAKE256_RESULT_SIZE ); + } + else + { + // Line 7: calculate message representative mu + // = SHAKE256( public key hash || modeId || cbContextByte || context || OID? || message/hash, 64 ) + // The OID is only included in the HashML-DSA mode + SymCryptShake256Append( pShakeState, pkMlDsakey->publicKeyHash, sizeof(pkMlDsakey->publicKeyHash) ); + SymCryptShake256Append( pShakeState, &modeId, sizeof( modeId ) ); + SymCryptShake256Append( pShakeState, &cbContextByte, sizeof( cbContextByte ) ); + + SymCryptShake256Append( pShakeState, pbContext, cbContext ); + SymCryptShake256Append( pShakeState, pbHashOid, cbHashOid ); + + SymCryptShake256Append( pShakeState, pbInput, cbInput ); + SymCryptShake256Result( pShakeState, messageRepresentative ); + } + + SymCryptShake256Append( pShakeState, messageRepresentative, sizeof(messageRepresentative) ); + SymCryptShake256Append( pShakeState, pbw1Encoded, cbw1Encoded ); + SymCryptShake256Extract( pShakeState, recalculatedCommitmentHash, pParams->cbCommitmentHash, TRUE ); + + if( !SymCryptEqual( recalculatedCommitmentHash, commitmentHash, pParams->cbCommitmentHash ) ) + { + scError = SYMCRYPT_SIGNATURE_VERIFICATION_FAILURE; + goto cleanup; + } + +cleanup: + if( pTemps != NULL ) + { + SymCryptMlDsaTemporariesFree( pTemps ); + } + + return scError; +} + +_Use_decl_annotations_ +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaVerify( + PCSYMCRYPT_MLDSAKEY pkMlDsakey, + PCBYTE pbMessage, + SIZE_T cbMessage, + PCBYTE pbContext, + SIZE_T cbContext, + PCBYTE pbSignature, + SIZE_T cbSignature, + UINT32 flags ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + if( (flags != 0) || // No flags currently supported + (cbContext > SYMCRYPT_MLDSA_CONTEXT_MAX_LENGTH) || + (cbSignature != pkMlDsakey->pParams->cbEncodedSignature) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + scError = SymCryptMlDsaVerifyEx( + pkMlDsakey, + pbMessage, + cbMessage, + pbContext, + cbContext, + NULL, // pbHashOid + 0, // cbHashOid + pbSignature, + cbSignature, + flags ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + +cleanup: + return scError; +} + +_Use_decl_annotations_ +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptExternalMuMlDsaVerify( + PCSYMCRYPT_MLDSAKEY pkMlDsakey, + PCBYTE pbMu, + SIZE_T cbMu, + PCBYTE pbSignature, + SIZE_T cbSignature, + UINT32 flags ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + if( (flags != 0) || // No flags currently supported + (cbMu != SYMCRYPT_SHAKE256_RESULT_SIZE) || + (cbSignature != pkMlDsakey->pParams->cbEncodedSignature) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + scError = SymCryptMlDsaVerifyEx( + pkMlDsakey, + pbMu, + cbMu, + NULL, // pbContext + 0, // cbContext + NULL, // pbHashOid + 0, // cbHashOid + pbSignature, + cbSignature, + SYMCRYPT_FLAG_MLDSA_EXTERNALMU ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + +cleanup: + return scError; +} + +_Use_decl_annotations_ +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHashMlDsaVerify( + PCSYMCRYPT_MLDSAKEY pkMlDsakey, + SYMCRYPT_PQDSA_HASH_ID hashAlg, + PCBYTE pbHash, + SIZE_T cbHash, + PCBYTE pbContext, + SIZE_T cbContext, + PCBYTE pbSignature, + SIZE_T cbSignature, + UINT32 flags ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PCSYMCRYPT_OID pHashOid = NULL; + + if( (flags != 0) || // No flags currently supported + (cbContext > SYMCRYPT_MLDSA_CONTEXT_MAX_LENGTH) || + (cbSignature != pkMlDsakey->pParams->cbEncodedSignature) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + scError = SymCryptHashMlDsaValidateHashAlgAndGetOid( + pkMlDsakey->pParams, + hashAlg, + cbHash, + &pHashOid ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + scError = SymCryptMlDsaVerifyEx( + pkMlDsakey, + pbHash, + cbHash, + pbContext, + cbContext, + pHashOid->pbOID, + pHashOid->cbOID, + pbSignature, + cbSignature, + flags ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + +cleanup: + return scError; +} diff --git a/libs/symcrypt/lib/mldsa_primitives.c b/libs/symcrypt/lib/mldsa_primitives.c new file mode 100644 index 00000000000..a6fe610e147 --- /dev/null +++ b/libs/symcrypt/lib/mldsa_primitives.c @@ -0,0 +1,2410 @@ +// +// mldsa_primitives.c ML-DSA low-level primitive implementations +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +// +// Q^-1 mod 2^32 - used in Montgomery reduction +// +#define SYMCRYPT_MLDSA_Q_INV (58728449) + +// +// Inverse NTT fixup times R = (256^-1 << 32) mod Q +// +#define SYMCRYPT_MLDSA_INTT_FIXUP_TIMES_R (16382) + +// +// R^2 mod Q - used for multiplying a factor of R into a polynomial in NTT form via +// Montgomery multiplication +// +#define SYMCRYPT_MLDSA_RSQR (2365951) + +// +// Size of the expanded public seed used in SymCryptMlDsaRejNttPoly +// Defined in FIPS 204 to be 272 bits (256 bit public seed rho || 8 bit index s || 8 bit index r) +// +#define SYMCRYPT_MLDSA_REJNTTPOLY_SEED_SIZE (34) + +// +// Size of the expanded private seed used in SymCryptMlDsaRejBoundedPoly +// Defined in FIPS 204 to be 528 bits (512 bit private vector seed rho' || 16 bit index) +// +#define SYMCRYPT_MLDSA_REJBOUNDEDPOLY_SEED_SIZE (66) + +// +// Number of low-order bits dropped by Power2Round. Defined as d in FIPS 204 +// +#define SYMCRYPT_POWER2ROUND_LOW_ORDER_BITS (13) + +// +// Zeta tables. +// For ML-DSA, zeta = 1753, which is a 512th root of unity modulo Q +// +// In ML-DSA we use powers of zeta to convert to and from NTT form +// and to perform multiplication between polynomials in NTT form +// + +// This table is a lookup for (Zeta^(BitRev(index)) * R) mod Q +// Used in NTT and Inverse NTT +// i.e. element 1 is Zeta^(BitRev(1)) * (2^32) mod Q == (1753^128)*(2^32) mod 8380417 == 25847 +// +// MLDSA_ZETA_BITREV_TIMES_R = [ (pow(1753, bitRev(i), 8380417) << 32) % 8380417 for i in range(256) ] +// +const UINT32 MLDSA_ZETA_BITREV_TIMES_R[SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS] = { + 4193792, 25847, 5771523, 7861508, 237124, 7602457, 7504169, 466468, + 1826347, 2353451, 8021166, 6288512, 3119733, 5495562, 3111497, 2680103, + 2725464, 1024112, 7300517, 3585928, 7830929, 7260833, 2619752, 6271868, + 6262231, 4520680, 6980856, 5102745, 1757237, 8360995, 4010497, 280005, + 2706023, 95776, 3077325, 3530437, 6718724, 4788269, 5842901, 3915439, + 4519302, 5336701, 3574422, 5512770, 3539968, 8079950, 2348700, 7841118, + 6681150, 6736599, 3505694, 4558682, 3507263, 6239768, 6779997, 3699596, + 811944, 531354, 954230, 3881043, 3900724, 5823537, 2071892, 5582638, + 4450022, 6851714, 4702672, 5339162, 6927966, 3475950, 2176455, 6795196, + 7122806, 1939314, 4296819, 7380215, 5190273, 5223087, 4747489, 126922, + 3412210, 7396998, 2147896, 2715295, 5412772, 4686924, 7969390, 5903370, + 7709315, 7151892, 8357436, 7072248, 7998430, 1349076, 1852771, 6949987, + 5037034, 264944, 508951, 3097992, 44288, 7280319, 904516, 3958618, + 4656075, 8371839, 1653064, 5130689, 2389356, 8169440, 759969, 7063561, + 189548, 4827145, 3159746, 6529015, 5971092, 8202977, 1315589, 1341330, + 1285669, 6795489, 7567685, 6940675, 5361315, 4499357, 4751448, 3839961, + 2091667, 3407706, 2316500, 3817976, 5037939, 2244091, 5933984, 4817955, + 266997, 2434439, 7144689, 3513181, 4860065, 4621053, 7183191, 5187039, + 900702, 1859098, 909542, 819034, 495491, 6767243, 8337157, 7857917, + 7725090, 5257975, 2031748, 3207046, 4823422, 7855319, 7611795, 4784579, + 342297, 286988, 5942594, 4108315, 3437287, 5038140, 1735879, 203044, + 2842341, 2691481, 5790267, 1265009, 4055324, 1247620, 2486353, 1595974, + 4613401, 1250494, 2635921, 4832145, 5386378, 1869119, 1903435, 7329447, + 7047359, 1237275, 5062207, 6950192, 7929317, 1312455, 3306115, 6417775, + 7100756, 1917081, 5834105, 7005614, 1500165, 777191, 2235880, 3406031, + 7838005, 5548557, 6709241, 6533464, 5796124, 4656147, 594136, 4603424, + 6366809, 2432395, 2454455, 8215696, 1957272, 3369112, 185531, 7173032, + 5196991, 162844, 1616392, 3014001, 810149, 1652634, 4686184, 6581310, + 5341501, 3523897, 3866901, 269760, 2213111, 7404533, 1717735, 472078, + 7953734, 1723600, 6577327, 1910376, 6712985, 7276084, 8119771, 4546524, + 5441381, 6144432, 7959518, 6094090, 183443, 7403526, 1612842, 4834730, + 7826001, 3919660, 8332111, 7018208, 3937738, 1400424, 7534263, 1976782 +}; + +const UINT32 MLDSA_NEGATIVE_ZETA_BITREV_TIMES_R[SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS] = { + 4186625, 8354570, 2608894, 518909, 8143293, 777960, 876248, 7913949, + 6554070, 6026966, 359251, 2091905, 5260684, 2884855, 5268920, 5700314, + 5654953, 7356305, 1079900, 4794489, 549488, 1119584, 5760665, 2108549, + 2118186, 3859737, 1399561, 3277672, 6623180, 19422, 4369920, 8100412, + 5674394, 8284641, 5303092, 4849980, 1661693, 3592148, 2537516, 4464978, + 3861115, 3043716, 4805995, 2867647, 4840449, 300467, 6031717, 539299, + 1699267, 1643818, 4874723, 3821735, 4873154, 2140649, 1600420, 4680821, + 7568473, 7849063, 7426187, 4499374, 4479693, 2556880, 6308525, 2797779, + 3930395, 1528703, 3677745, 3041255, 1452451, 4904467, 6203962, 1585221, + 1257611, 6441103, 4083598, 1000202, 3190144, 3157330, 3632928, 8253495, + 4968207, 983419, 6232521, 5665122, 2967645, 3693493, 411027, 2477047, + 671102, 1228525, 22981, 1308169, 381987, 7031341, 6527646, 1430430, + 3343383, 8115473, 7871466, 5282425, 8336129, 1100098, 7475901, 4421799, + 3724342, 8578, 6727353, 3249728, 5991061, 210977, 7620448, 1316856, + 8190869, 3553272, 5220671, 1851402, 2409325, 177440, 7064828, 7039087, + 7094748, 1584928, 812732, 1439742, 3019102, 3881060, 3628969, 4540456, + 6288750, 4972711, 6063917, 4562441, 3342478, 6136326, 2446433, 3562462, + 8113420, 5945978, 1235728, 4867236, 3520352, 3759364, 1197226, 3193378, + 7479715, 6521319, 7470875, 7561383, 7884926, 1613174, 43260, 522500, + 655327, 3122442, 6348669, 5173371, 3556995, 525098, 768622, 3595838, + 8038120, 8093429, 2437823, 4272102, 4943130, 3342277, 6644538, 8177373, + 5538076, 5688936, 2590150, 7115408, 4325093, 7132797, 5894064, 6784443, + 3767016, 7129923, 5744496, 3548272, 2994039, 6511298, 6476982, 1050970, + 1333058, 7143142, 3318210, 1430225, 451100, 7067962, 5074302, 1962642, + 1279661, 6463336, 2546312, 1374803, 6880252, 7603226, 6144537, 4974386, + 542412, 2831860, 1671176, 1846953, 2584293, 3724270, 7786281, 3776993, + 2013608, 5948022, 5925962, 164721, 6423145, 5011305, 8194886, 1207385, + 3183426, 8217573, 6764025, 5366416, 7570268, 6727783, 3694233, 1799107, + 3038916, 4856520, 4513516, 8110657, 6167306, 975884, 6662682, 7908339, + 426683, 6656817, 1803090, 6470041, 1667432, 1104333, 260646, 3833893, + 2939036, 2235985, 420899, 2286327, 8196974, 976891, 6767575, 3545687, + 554416, 4460757, 48306, 1362209, 4442679, 6979993, 846154, 6403635 +}; + +const SYMCRYPT_MLDSA_INTERNAL_PARAMS SymCryptMlDsaInternalParams44 = +{ + .params = SYMCRYPT_MLDSA_PARAMS_MLDSA44, + .cbPolyElement = SYMCRYPT_INTERNAL_MLDSA_SIZEOF_POLYELEMENT, + .cbRowVector = SYMCRYPT_INTERNAL_MLDSA_SIZEOF_VECTOR(4), + .cbColVector = SYMCRYPT_INTERNAL_MLDSA_SIZEOF_VECTOR(4), + .cbMatrix = SYMCRYPT_INTERNAL_MLDSA_SIZEOF_MATRIX(4, 4), + .nRows = 4, + .nCols = 4, + .privateKeyRange = 2, + .encodedCoefficientBitLength = 3, + .nChallengeNonZeroCoeffs = 39, + .nHintNonZeroCoeffs = 80, + .maskCoefficientRangeLog2 = 17, + .commitmentModulus = 44, + .decomposeR1Factor = 11275, + .commitmentRoundingRange = 95232, + .w1EncodeCoefficientBitLength = 6, // [0, 43] + .cbCommitmentHash = 32, + .cbEncodedPrivateKey = 2560, + .cbEncodedPublicKey = 1312, + .cbEncodedSignature = SYMCRYPT_MLDSA_SIGNATURE_SIZE_MLDSA44 +}; + +const SYMCRYPT_MLDSA_INTERNAL_PARAMS SymCryptMlDsaInternalParams65 = +{ + .params = SYMCRYPT_MLDSA_PARAMS_MLDSA65, + .cbPolyElement = SYMCRYPT_INTERNAL_MLDSA_SIZEOF_POLYELEMENT, + .cbRowVector = SYMCRYPT_INTERNAL_MLDSA_SIZEOF_VECTOR(6), + .cbColVector = SYMCRYPT_INTERNAL_MLDSA_SIZEOF_VECTOR(5), + .cbMatrix = SYMCRYPT_INTERNAL_MLDSA_SIZEOF_MATRIX(6, 5), + .nRows = 6, + .nCols = 5, + .privateKeyRange = 4, + .encodedCoefficientBitLength = 4, + .nChallengeNonZeroCoeffs = 49, + .nHintNonZeroCoeffs = 55, + .maskCoefficientRangeLog2 = 19, + .commitmentModulus = 16, + .decomposeR1Factor = 4100, + .commitmentRoundingRange = 261888, + .w1EncodeCoefficientBitLength = 4, // [0, 15] + .cbCommitmentHash = 48, + .cbEncodedPrivateKey = 4032, + .cbEncodedPublicKey = 1952, + .cbEncodedSignature = SYMCRYPT_MLDSA_SIGNATURE_SIZE_MLDSA65 +}; + +const SYMCRYPT_MLDSA_INTERNAL_PARAMS SymCryptMlDsaInternalParams87 = +{ + .params = SYMCRYPT_MLDSA_PARAMS_MLDSA87, + .cbPolyElement = SYMCRYPT_INTERNAL_MLDSA_SIZEOF_POLYELEMENT, + .cbRowVector = SYMCRYPT_INTERNAL_MLDSA_SIZEOF_VECTOR(8), + .cbColVector = SYMCRYPT_INTERNAL_MLDSA_SIZEOF_VECTOR(7), + .cbMatrix = SYMCRYPT_INTERNAL_MLDSA_SIZEOF_MATRIX(8, 7), + .nRows = 8, + .nCols = 7, + .privateKeyRange = 2, + .encodedCoefficientBitLength = 3, + .nChallengeNonZeroCoeffs = 60, + .nHintNonZeroCoeffs = 75, + .maskCoefficientRangeLog2 = 19, + .commitmentModulus = 16, + .decomposeR1Factor = 4100, + .commitmentRoundingRange = 261888, + .w1EncodeCoefficientBitLength = 4, // [0, 15] + .cbCommitmentHash = 64, + .cbEncodedPrivateKey = 4896, + .cbEncodedPublicKey = 2592, + .cbEncodedSignature = SYMCRYPT_MLDSA_SIGNATURE_SIZE_MLDSA87 +}; + +typedef struct _SYMCRYPT_HASH_OID_MAPPING +{ + SYMCRYPT_PQDSA_HASH_ID hashId; + const PCSYMCRYPT_HASH pHashAlgorithm; + PCSYMCRYPT_OID pOid; + BOOLEAN fIsXof; +} SYMCRYPT_HASH_OID_MAPPING, *PSYMCRYPT_HASH_OID_MAPPING; + +// +// Mapping of hash OIDs to SymCrypt hash algorithms. Currently this only contains the "short" hash +// OIDs, and only for those algorithms that are approved for use in ML-DSA. In the future, we might +// want to make this functionality more generic, but that requires more thought about the design. +// Ideally, the SYMCRYPT_HASH structures could contain pointers to their corresponding OIDs, but +// those structures are exposed externally, so extending them would be a breaking change. +// +const SYMCRYPT_HASH_OID_MAPPING g_hashOidMap[] = +{ + { SYMCRYPT_PQDSA_HASH_ID_SHA256, &SymCryptSha256Algorithm_default, &SymCryptSha256OidList[1], FALSE }, + { SYMCRYPT_PQDSA_HASH_ID_SHA384, &SymCryptSha384Algorithm_default, &SymCryptSha384OidList[1], FALSE }, + { SYMCRYPT_PQDSA_HASH_ID_SHA512, &SymCryptSha512Algorithm_default, &SymCryptSha512OidList[1], FALSE }, + { SYMCRYPT_PQDSA_HASH_ID_SHA512_256, &SymCryptSha512_256Algorithm_default, &SymCryptSha512_256OidList[1], FALSE }, + { SYMCRYPT_PQDSA_HASH_ID_SHA3_256, &SymCryptSha3_256Algorithm_default, &SymCryptSha3_256OidList[1], FALSE }, + { SYMCRYPT_PQDSA_HASH_ID_SHA3_384, &SymCryptSha3_384Algorithm_default, &SymCryptSha3_384OidList[1], FALSE }, + { SYMCRYPT_PQDSA_HASH_ID_SHA3_512, &SymCryptSha3_512Algorithm_default, &SymCryptSha3_512OidList[1], FALSE }, + { SYMCRYPT_PQDSA_HASH_ID_SHAKE128, &SymCryptShake128HashAlgorithm_default, &SymCryptShake128OidList[1], TRUE }, + { SYMCRYPT_PQDSA_HASH_ID_SHAKE256, &SymCryptShake256HashAlgorithm_default, &SymCryptShake256OidList[1], TRUE } +}; + +// +// The table above relies on the OID lists having (at least) two entries, where the second one +// is the 11-byte encoding of the OID. If this ever changes, the table needs to be updated. +// +C_ASSERT( SYMCRYPT_SHA256_OID_COUNT == 2 ); +C_ASSERT( SYMCRYPT_SHA384_OID_COUNT == 2 ); +C_ASSERT( SYMCRYPT_SHA512_OID_COUNT == 2 ); +C_ASSERT( SYMCRYPT_SHA512_256_OID_COUNT == 2 ); +C_ASSERT( SYMCRYPT_SHA3_256_OID_COUNT == 2 ); +C_ASSERT( SYMCRYPT_SHA3_384_OID_COUNT == 2 ); +C_ASSERT( SYMCRYPT_SHA3_512_OID_COUNT == 2 ); +C_ASSERT( SYMCRYPT_SHAKE128_OID_COUNT == 2 ); +C_ASSERT( SYMCRYPT_SHAKE256_OID_COUNT == 2 ); + +_Use_decl_annotations_ +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaGetInternalParamsFromParams( + SYMCRYPT_MLDSA_PARAMS params, + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS* pInternalParams ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + switch( params ) + { + case SYMCRYPT_MLDSA_PARAMS_MLDSA44: + *pInternalParams = &SymCryptMlDsaInternalParams44; + break; + case SYMCRYPT_MLDSA_PARAMS_MLDSA65: + *pInternalParams = &SymCryptMlDsaInternalParams65; + break; + case SYMCRYPT_MLDSA_PARAMS_MLDSA87: + *pInternalParams = &SymCryptMlDsaInternalParams87; + break; + case SYMCRYPT_MLDSA_PARAMS_NULL: + scError = SYMCRYPT_INCOMPATIBLE_FORMAT; + break; + default: + scError = SYMCRYPT_INVALID_ARGUMENT; + break; + } + + return scError; +} + +_Use_decl_annotations_ +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaSizeofKeyFormatFromParams( + SYMCRYPT_MLDSA_PARAMS params, + SYMCRYPT_MLDSAKEY_FORMAT mlDsakeyFormat, + SIZE_T* pcbKeyFormat ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pInternalParams = NULL; + + scError = SymCryptMlDsaGetInternalParamsFromParams( params, &pInternalParams ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + switch( mlDsakeyFormat ) + { + case SYMCRYPT_MLDSAKEY_FORMAT_PRIVATE_SEED: + *pcbKeyFormat = SYMCRYPT_MLDSA_ROOT_SEED_SIZE; + break; + case SYMCRYPT_MLDSAKEY_FORMAT_PRIVATE_KEY: + *pcbKeyFormat = pInternalParams->cbEncodedPrivateKey; + break; + case SYMCRYPT_MLDSAKEY_FORMAT_PUBLIC_KEY: + *pcbKeyFormat = pInternalParams->cbEncodedPublicKey; + break; + default: + scError = SYMCRYPT_INVALID_ARGUMENT; + break; + } + +cleanup: + return scError; +} + +_Use_decl_annotations_ +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaSizeofSignatureFromParams( + SYMCRYPT_MLDSA_PARAMS params, + SIZE_T* pcbSignature ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pInternalParams = NULL; + + scError = SymCryptMlDsaGetInternalParamsFromParams( params, &pInternalParams ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + *pcbSignature = pInternalParams->cbEncodedSignature; + +cleanup: + return scError; +} + +_Use_decl_annotations_ +PSYMCRYPT_MLDSAKEY +SYMCRYPT_CALL +SymCryptMlDsakeyInitialize( + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pInternalParams, + PBYTE pbKey, + UINT32 cbKey ) +{ + PSYMCRYPT_MLDSAKEY pkMlDsakey = (PSYMCRYPT_MLDSAKEY) pbKey; + SYMCRYPT_ASSERT( pkMlDsakey != NULL ); + + UINT8 nRows = pInternalParams->nRows; + UINT8 nCols = pInternalParams->nCols; + + SYMCRYPT_ASSERT( cbKey == SYMCRYPT_INTERNAL_MLDSA_SIZEOF_KEY(nRows, nCols) ); + + UINT32 cbMatrix = pInternalParams->cbMatrix; // A matrix + UINT32 cbRowVector = pInternalParams->cbRowVector; // s2, t vectors + UINT32 cbColVector = pInternalParams->cbColVector; // s1 vector + + SymCryptWipe( pbKey, cbKey ); + + pkMlDsakey->pParams = pInternalParams; + pkMlDsakey->cbTotalSize = cbKey; + + PBYTE pbCurrent = pbKey + sizeof(SYMCRYPT_MLDSAKEY); + + // Public components + pkMlDsakey->pmA = SymCryptMlDsaMatrixCreate( pbCurrent, cbMatrix, nRows, nCols ); + pbCurrent += cbMatrix; + + pkMlDsakey->pvt1 = SymCryptMlDsaVectorCreate( pbCurrent, cbRowVector, nRows ); + pbCurrent += cbRowVector; + + // Private components + pkMlDsakey->pvs1 = SymCryptMlDsaVectorCreate( pbCurrent, cbColVector, nCols ); + pbCurrent += cbColVector; + + pkMlDsakey->pvs2 = SymCryptMlDsaVectorCreate( pbCurrent, cbRowVector, nRows ); + pbCurrent += cbRowVector; + + pkMlDsakey->pvt0 = SymCryptMlDsaVectorCreate( pbCurrent, cbRowVector, nRows ); + pbCurrent += cbRowVector; + + SYMCRYPT_ASSERT( pbCurrent == pbKey + cbKey ); + + SYMCRYPT_SET_MAGIC( pkMlDsakey ); + + return pkMlDsakey; +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsakeyComputeT( + PCSYMCRYPT_MLDSA_MATRIX pmA, + PCSYMCRYPT_MLDSA_VECTOR pvs1, + PCSYMCRYPT_MLDSA_VECTOR pvs2, + PSYMCRYPT_MLDSA_VECTOR pvt0, + PSYMCRYPT_MLDSA_VECTOR pvt1, + PSYMCRYPT_MLDSA_VECTOR pvTmp, + PSYMCRYPT_MLDSA_POLYELEMENT peTmp ) +{ + // T = InvNTT(NTT(A)*NTT(s1) + NTT(s2)) + // pvTmp := NTT(A)*NTT(s1) + SymCryptMlDsaMatrixVectorMontMul( + pmA, + pvs1, + pvTmp, + peTmp ); + + // TODO: should probably do multiplication by directly in the matrix multiplication function + for(UINT8 i = 0; i < pvTmp->nElems; ++i) + { + SymCryptMlDsaPolyElementMulR(SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT(i, pvTmp)); + } + + // pvTmp := pvTmp + NTT(s2) + SymCryptMlDsaVectorAdd( pvTmp, pvs2, pvTmp ); + + // T = pvTmp := InvNTT(NTT(A)*NTT(s1) + NTT(s2)) + SymCryptMlDsaVectorINTT( pvTmp ); + + SymCryptMlDsaVectorPower2Round( pvTmp, pvt1, pvt0 ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptMlDsaMontReduce( UINT64 a ) +{ + UINT32 t = ((UINT32) a) * SYMCRYPT_MLDSA_Q_INV; + UINT32 m = (((UINT64) t) * SYMCRYPT_MLDSA_Q) >> SYMCRYPT_MLDSA_R_LOG2; + + UINT64 res = (a >> SYMCRYPT_MLDSA_R_LOG2) - m; + UINT32 additionMask = SYMCRYPT_MASK32_LT( res, 0 ); + + res = res + (SYMCRYPT_MLDSA_Q & additionMask); + SYMCRYPT_ASSERT( res < SYMCRYPT_MLDSA_Q ); + + return (UINT32) res; +} + +UINT32 +SYMCRYPT_CALL +SymCryptMlDsaMontMul( UINT32 a, UINT32 b ) +{ + SYMCRYPT_ASSERT( a < SYMCRYPT_MLDSA_Q ); + SYMCRYPT_ASSERT( b < SYMCRYPT_MLDSA_Q ); + + return SymCryptMlDsaMontReduce((UINT64) a * b); +} + +UINT32 +SYMCRYPT_CALL +SymCryptMlDsaModAdd( UINT32 a, UINT32 b ) +{ + SYMCRYPT_ASSERT( a < SYMCRYPT_MLDSA_Q ); + SYMCRYPT_ASSERT( b < SYMCRYPT_MLDSA_Q ); + + UINT32 res = a + b; + UINT32 subtractionMask = SYMCRYPT_MASK32_LT( SYMCRYPT_MLDSA_Q - 1, res ); + + // If res >= Q, subtract Q + res = res - (SYMCRYPT_MLDSA_Q & subtractionMask); + + return res; +} + +UINT32 +SYMCRYPT_CALL +SymCryptMlDsaModSub( UINT32 a, UINT32 b ) +{ + SYMCRYPT_ASSERT( a < SYMCRYPT_MLDSA_Q ); + SYMCRYPT_ASSERT( b < SYMCRYPT_MLDSA_Q ); + + UINT32 additionMask = SYMCRYPT_MASK32_LT( a, b ); + + // If a < b, result is negative, so we add Q + return (INT32) a - (INT32) b + (SYMCRYPT_MLDSA_Q & additionMask); +} + +_Use_decl_annotations_ +PSYMCRYPT_MLDSA_POLYELEMENT +SYMCRYPT_CALL +SymCryptMlDsaPolyElementCreate( + PBYTE pbBuffer, + SIZE_T cbBuffer ) +{ + UNREFERENCED_PARAMETER( cbBuffer ); + SYMCRYPT_ASSERT( cbBuffer == SYMCRYPT_INTERNAL_MLDSA_SIZEOF_POLYELEMENT ); + + PSYMCRYPT_MLDSA_POLYELEMENT peElement = (PSYMCRYPT_MLDSA_POLYELEMENT) pbBuffer; + SYMCRYPT_ASSERT( peElement != NULL ); + + return peElement; +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaPolyElementSetZero( + PSYMCRYPT_MLDSA_POLYELEMENT peDst ) +{ + for( UINT32 i = 0; i < SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; ++i ) + { + peDst->coeffs[i] = 0; + } +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaPolyElementNTT( + PSYMCRYPT_MLDSA_POLYELEMENT peSrc ) +{ + C_ASSERT( (SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS & 1) == 0); + + UINT32 k = 0; + + for(UINT32 len = SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 2; len >= 1; len /= 2) + { + for(UINT32 start = 0; start < SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; start += 2 * len) + { + k++; + UINT32 twiddleFactor = MLDSA_ZETA_BITREV_TIMES_R[k]; + + for(UINT32 j = start; j < start + len; j++) + { + // + // Typically for Montgomery multiplication, both operands have a factor of R. + // After multiplying, the product has a factor for R^2, and a reduction is then + // performed which divides out a factor of R, resulting in the product again having + // a factor of R^1, mod Q. In this case, the twiddleFactor is pre-multiplied by R, + // but the coefficients are not expected to have a factor of R; thus, after + // reduction, the result does not have a factor of R either. + // + UINT32 t = SymCryptMlDsaMontMul(twiddleFactor, peSrc->coeffs[j + len]); + peSrc->coeffs[j + len] = SymCryptMlDsaModSub(peSrc->coeffs[j], t); + peSrc->coeffs[j] = SymCryptMlDsaModAdd(peSrc->coeffs[j], t); + } + } + } +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaPolyElementINTT( + PSYMCRYPT_MLDSA_POLYELEMENT peSrc ) +{ + C_ASSERT( (SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS & 1) == 0); + + UINT32 k = SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; + + for(UINT32 len = 1; len < SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; len *= 2) + { + for(UINT32 start = 0; start < SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; start += 2 * len) + { + k--; + UINT32 twiddleFactor = MLDSA_NEGATIVE_ZETA_BITREV_TIMES_R[k]; + + for(UINT32 j = start; j < start + len; j++) + { + // + // As above, our twiddleFactor is pre-multiplied by R, but the coefficients are not, + // so after the reduction, the result does not have a factor of R. + // + UINT32 t = peSrc->coeffs[j]; + peSrc->coeffs[j] = SymCryptMlDsaModAdd(t, peSrc->coeffs[j + len]); + peSrc->coeffs[j + len] = SymCryptMlDsaModSub(t, peSrc->coeffs[j + len]); + peSrc->coeffs[j + len] = SymCryptMlDsaMontMul(twiddleFactor, peSrc->coeffs[j + len]); + } + } + } + + for(UINT32 i = 0; i < SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; i++) + { + peSrc->coeffs[i] = SymCryptMlDsaMontMul(SYMCRYPT_MLDSA_INTT_FIXUP_TIMES_R, peSrc->coeffs[i]); + } +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaPolyElementMulR( + PSYMCRYPT_MLDSA_POLYELEMENT peSrc ) +{ + for(UINT32 i = 0; i < SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; i++) + { + peSrc->coeffs[i] = SymCryptMlDsaMontMul(SYMCRYPT_MLDSA_RSQR, peSrc->coeffs[i]); + } +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaPolyElementMontMul( + PCSYMCRYPT_MLDSA_POLYELEMENT peSrc1, + PCSYMCRYPT_MLDSA_POLYELEMENT peSrc2, + PSYMCRYPT_MLDSA_POLYELEMENT peDst ) +{ + for(UINT32 i = 0; i < SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; i++) + { + peDst->coeffs[i] = SymCryptMlDsaMontMul(peSrc1->coeffs[i], peSrc2->coeffs[i]); + } +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaPolyElementAdd( + PCSYMCRYPT_MLDSA_POLYELEMENT peSrc1, + PCSYMCRYPT_MLDSA_POLYELEMENT peSrc2, + PSYMCRYPT_MLDSA_POLYELEMENT peDst ) +{ + for(UINT32 i = 0; i < SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; i++) + { + peDst->coeffs[i] = SymCryptMlDsaModAdd(peSrc1->coeffs[i], peSrc2->coeffs[i]); + } +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaPolyElementSub( + PCSYMCRYPT_MLDSA_POLYELEMENT peSrc1, + PCSYMCRYPT_MLDSA_POLYELEMENT peSrc2, + PSYMCRYPT_MLDSA_POLYELEMENT peDst ) +{ + for(UINT32 i = 0; i < SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; i++) + { + peDst->coeffs[i] = SymCryptMlDsaModSub(peSrc1->coeffs[i], peSrc2->coeffs[i]); + } +} + +_Use_decl_annotations_ +PSYMCRYPT_MLDSA_VECTOR +SYMCRYPT_CALL +SymCryptMlDsaVectorCreate( + PBYTE pbBuffer, + UINT32 cbBuffer, + UINT8 nElems ) +{ + SYMCRYPT_ASSERT( nElems > 0); + SYMCRYPT_ASSERT( nElems <= SYMCRYPT_MLDSA_VECTOR_MAX_LENGTH ); + SYMCRYPT_ASSERT( cbBuffer == SYMCRYPT_INTERNAL_MLDSA_SIZEOF_VECTOR(nElems) ); + + PSYMCRYPT_MLDSA_VECTOR pvVector = (PSYMCRYPT_MLDSA_VECTOR) pbBuffer; + SYMCRYPT_ASSERT( pvVector != NULL ); + + pvVector->nElems = nElems; + pvVector->cbTotalSize = cbBuffer; + + PBYTE pbCurrent = pbBuffer + sizeof(SYMCRYPT_MLDSA_VECTOR); + for( UINT32 i = 0; i < nElems; ++i ) + { + SymCryptMlDsaPolyElementCreate( pbCurrent, SYMCRYPT_INTERNAL_MLDSA_SIZEOF_POLYELEMENT ); + pbCurrent += SYMCRYPT_INTERNAL_MLDSA_SIZEOF_POLYELEMENT; + } + + return pvVector; +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaVectorCopy( + PCSYMCRYPT_MLDSA_VECTOR pvSrc, + PSYMCRYPT_MLDSA_VECTOR pvDst ) +{ + SYMCRYPT_ASSERT( pvSrc->nElems == pvDst->nElems ); + + memcpy( pvDst, pvSrc, pvSrc->cbTotalSize ); +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaVectorSetZero( + PSYMCRYPT_MLDSA_VECTOR pvDst ) +{ + for( UINT32 i = 0; i < pvDst->nElems; ++i ) + { + PSYMCRYPT_MLDSA_POLYELEMENT peDst = SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvDst ); + SymCryptMlDsaPolyElementSetZero( peDst ); + } +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaVectorAdd( + PCSYMCRYPT_MLDSA_VECTOR pvSrc1, + PCSYMCRYPT_MLDSA_VECTOR pvSrc2, + PSYMCRYPT_MLDSA_VECTOR pvDst ) +{ + SYMCRYPT_ASSERT( pvSrc1->nElems == pvSrc2->nElems ); + SYMCRYPT_ASSERT( pvSrc1->nElems == pvDst->nElems ); + + PCSYMCRYPT_MLDSA_POLYELEMENT peSrc1, peSrc2; + PSYMCRYPT_MLDSA_POLYELEMENT peDst; + + for( UINT32 i = 0; i < pvSrc1->nElems; ++i ) + { + peSrc1 = SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvSrc1 ); + peSrc2 = SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvSrc2 ); + peDst = SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvDst ); + + SymCryptMlDsaPolyElementAdd( peSrc1, peSrc2, peDst ); + } +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaVectorSub( + PCSYMCRYPT_MLDSA_VECTOR pvSrc1, + PCSYMCRYPT_MLDSA_VECTOR pvSrc2, + PSYMCRYPT_MLDSA_VECTOR pvDst ) +{ + SYMCRYPT_ASSERT( pvSrc1->nElems == pvSrc2->nElems ); + SYMCRYPT_ASSERT( pvSrc1->nElems == pvDst->nElems ); + + PCSYMCRYPT_MLDSA_POLYELEMENT peSrc1, peSrc2; + PSYMCRYPT_MLDSA_POLYELEMENT peDst; + + for( UINT32 i = 0; i < pvSrc1->nElems; ++i ) + { + peSrc1 = SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvSrc1 ); + peSrc2 = SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvSrc2 ); + peDst = SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvDst ); + + SymCryptMlDsaPolyElementSub( peSrc1, peSrc2, peDst ); + } +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaVectorPolyElementMontMul( + PCSYMCRYPT_MLDSA_VECTOR pvSrc1, + PCSYMCRYPT_MLDSA_POLYELEMENT peSrc2, + PSYMCRYPT_MLDSA_VECTOR pvDst ) +{ + SYMCRYPT_ASSERT( pvSrc1->nElems == pvDst->nElems ); + + for( UINT32 i = 0; i < pvSrc1->nElems; ++i ) + { + PSYMCRYPT_MLDSA_POLYELEMENT peSrc1 = SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvSrc1 ); + PSYMCRYPT_MLDSA_POLYELEMENT peDst = SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvDst ); + + SymCryptMlDsaPolyElementMontMul( peSrc1, peSrc2, peDst ); + } +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaVectorNTT( + PSYMCRYPT_MLDSA_VECTOR pvSrc ) +{ + for( UINT32 i = 0; i < pvSrc->nElems; ++i ) + { + SymCryptMlDsaPolyElementNTT( SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvSrc ) ); + } +} + +VOID +SYMCRYPT_CALL +SymCryptMlDsaVectorINTT( + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvSrc ) +{ + for( UINT32 i = 0; i < pvSrc->nElems; ++i ) + { + SymCryptMlDsaPolyElementINTT( SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvSrc ) ); + } +} + +_Use_decl_annotations_ +PSYMCRYPT_MLDSA_MATRIX +SYMCRYPT_CALL +SymCryptMlDsaMatrixCreate( + PBYTE pbBuffer, + UINT32 cbBuffer, + UINT8 nRows, + UINT8 nCols ) +{ + SYMCRYPT_ASSERT( nRows > 0); + SYMCRYPT_ASSERT( nCols > 0); + SYMCRYPT_ASSERT( nRows <= SYMCRYPT_MLDSA_MATRIX_MAX_NROWS ); + SYMCRYPT_ASSERT( nCols <= SYMCRYPT_MLDSA_MATRIX_MAX_NCOLS ); + SYMCRYPT_ASSERT( cbBuffer == SYMCRYPT_INTERNAL_MLDSA_SIZEOF_MATRIX(nRows, nCols) ); + + PSYMCRYPT_MLDSA_MATRIX pMatrix = (PSYMCRYPT_MLDSA_MATRIX) pbBuffer; + SYMCRYPT_ASSERT( pMatrix != NULL ); + + pMatrix->nRows = nRows; + pMatrix->nCols = nCols; + pMatrix->cbTotalSize = cbBuffer; + + PBYTE pbCurrent = pbBuffer + sizeof(SYMCRYPT_MLDSA_MATRIX); + for(UINT32 i = 0; i < (UINT32) nRows * nCols; ++i) + { + SymCryptMlDsaPolyElementCreate( pbCurrent, SYMCRYPT_INTERNAL_MLDSA_SIZEOF_POLYELEMENT ); + pbCurrent += SYMCRYPT_INTERNAL_MLDSA_SIZEOF_POLYELEMENT; + } + + return pMatrix; +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaMatrixVectorMontMul( + PCSYMCRYPT_MLDSA_MATRIX pmSrc1, + PCSYMCRYPT_MLDSA_VECTOR pvSrc2, + PSYMCRYPT_MLDSA_VECTOR pvDst, + PSYMCRYPT_MLDSA_POLYELEMENT peTmp) +{ + SYMCRYPT_ASSERT( pmSrc1->nCols == pvSrc2->nElems ); + SYMCRYPT_ASSERT( pmSrc1->nRows == pvDst->nElems ); + + PCSYMCRYPT_MLDSA_POLYELEMENT peSrc1, peSrc2; + PSYMCRYPT_MLDSA_POLYELEMENT peDst; + + SymCryptMlDsaVectorSetZero( pvDst ); + + _Analysis_assume_( pmSrc1->nRows > 0 ); + _Analysis_assume_( pmSrc1->nCols > 0 ); + + for( UINT32 i = 0; i < pmSrc1->nRows; ++i ) + { + // peDst = pvDst[i] + peDst = SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvDst ); + + for( UINT32 j = 0; j < pmSrc1->nCols; ++j ) + { + peSrc1 = SYMCRYPT_INTERNAL_MLDSA_MATRIX_ELEMENT( i, j, pmSrc1 ); + peSrc2 = SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( j, pvSrc2 ); + + SymCryptMlDsaPolyElementMontMul( peSrc1, peSrc2, peTmp ); + SymCryptMlDsaPolyElementAdd( peDst, peTmp, peDst ); + } + } +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaRejNttPoly( + PCBYTE pbRejNttPolySeed, + SIZE_T cbRejNttPolySeed, + PSYMCRYPT_MLDSA_POLYELEMENT peDst ) +{ + SYMCRYPT_ASSERT( cbRejNttPolySeed == SYMCRYPT_MLDSA_REJNTTPOLY_SEED_SIZE ); + + SYMCRYPT_SHAKE128_STATE shakeState; + SymCryptShake128Init( &shakeState ); + SymCryptShake128Append( &shakeState, pbRejNttPolySeed, cbRejNttPolySeed ); + + UINT32 coeff = 0; + BYTE shakeBytes[4]; // We only use 3 bytes, but using 4 allows converting to UINT32 more easily + + SymCryptWipeKnownSize( shakeBytes, sizeof(shakeBytes) ); + + for(UINT32 i = 0; i < SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; i++) + { + // CoeffFromThreeBytes from FIPS 204 + do + { + SymCryptShake128Extract( &shakeState, shakeBytes, 3, FALSE ); + shakeBytes[2] &= 0x7F; // if b2 > 127, b2 -= 128 + coeff = SYMCRYPT_LOAD_LSBFIRST32( shakeBytes ); + } while (coeff >= SYMCRYPT_MLDSA_Q); + + peDst->coeffs[i] = coeff; + } +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaExpandA( + PCBYTE pbPublicSeed, + SIZE_T cbPublicSeed, + PSYMCRYPT_MLDSA_MATRIX pmA ) +{ + SYMCRYPT_ASSERT( cbPublicSeed == SYMCRYPT_MLDSA_PUBLIC_SEED_SIZE ); + C_ASSERT( SYMCRYPT_MLDSA_REJNTTPOLY_SEED_SIZE == SYMCRYPT_MLDSA_PUBLIC_SEED_SIZE + 2 ); + + // The expanded seed is the public seed concatenated with one byte each for the column and row + // indices of the matrix element being expanded. + BYTE rejNttSeed[SYMCRYPT_MLDSA_REJNTTPOLY_SEED_SIZE]; + memcpy( rejNttSeed, pbPublicSeed, cbPublicSeed ); + + for( UINT8 i = 0; i < pmA->nRows; ++i ) + { + for( UINT8 j = 0; j < pmA->nCols; ++j ) + { + rejNttSeed[SYMCRYPT_MLDSA_REJNTTPOLY_SEED_SIZE - 2] = j; + rejNttSeed[SYMCRYPT_MLDSA_REJNTTPOLY_SEED_SIZE - 1] = i; + + #pragma prefast( suppress: 6385, "False warning - reading invalid data from rejNttSeed" ); + SymCryptMlDsaRejNttPoly( rejNttSeed, sizeof(rejNttSeed), SYMCRYPT_INTERNAL_MLDSA_MATRIX_ELEMENT( i, j, pmA ) ); + + } + } +} + +_Use_decl_annotations_ +FORCEINLINE +INT8 +SYMCRYPT_CALL +SymCryptMlDsaCoeffFromHalfByte( + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + UINT8 halfByte ) +{ + SYMCRYPT_ASSERT( halfByte <= 15 ); + SYMCRYPT_ASSERT( pParams->privateKeyRange == 2 || pParams->privateKeyRange == 4 ); + + if( pParams->privateKeyRange == 2 && halfByte < 15) + { + UINT8 halfByteDiv5 = (UINT8) ( ( halfByte * 13 ) >> 6 ); + UINT8 halfByteMod5 = halfByte - (5 * halfByteDiv5); + return 2 - halfByteMod5; + } + else if( pParams->privateKeyRange == 4 && halfByte < 9 ) + { + return 4 - halfByte; + } + + return INT8_MIN; +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaRejBoundedPoly( + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + PCBYTE pbRejBoundedPolySeed, + SIZE_T cbRejBoundedPolySeed, + PSYMCRYPT_MLDSA_POLYELEMENT peDst ) +{ + SYMCRYPT_ASSERT( cbRejBoundedPolySeed == SYMCRYPT_MLDSA_REJBOUNDEDPOLY_SEED_SIZE ); + + SYMCRYPT_SHAKE256_STATE shakeState; + SymCryptShake256Init( &shakeState ); + SymCryptShake256Append( &shakeState, pbRejBoundedPolySeed, cbRejBoundedPolySeed ); + + BYTE shakeByte; + UINT32 i = 0; + INT8 z0, z1; + + do + { + // Note on sidechannel safety: the rejection sampling here can leak which bytes of the SHAKE + // output are used and which are rejected. However, bytes themselves are not leaked. This + // may allow the attacker to more quickly eliminate incorrect seed values when doing an + // exhaustive search, but given the size of the seed this should still not make the + // exhaustive search computationally feasible. + SymCryptShake256Extract( &shakeState, &shakeByte, sizeof(shakeByte), FALSE ); + z0 = SymCryptMlDsaCoeffFromHalfByte( pParams, shakeByte & 0x0F ); + z1 = SymCryptMlDsaCoeffFromHalfByte( pParams, shakeByte >> 4 ); + + SYMCRYPT_ASSERT( z0 == INT8_MIN || (( z0 + pParams->privateKeyRange >= 0 ) && ( z0 + pParams->privateKeyRange <= 2 * pParams->privateKeyRange )) ); + SYMCRYPT_ASSERT( z1 == INT8_MIN || (( z1 + pParams->privateKeyRange >= 0 ) && ( z1 + pParams->privateKeyRange <= 2 * pParams->privateKeyRange )) ); + + if(z0 != INT8_MIN) + { + peDst->coeffs[i] = SymCryptMlDsaSignedCoefficientModQ( z0 ); + i++; + } + + if(z1 != INT8_MIN && i < SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS) + { + peDst->coeffs[i] = SymCryptMlDsaSignedCoefficientModQ( z1 ); + i++; + } + + } while( i < SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS ); + + SymCryptWipeKnownSize( &shakeState, sizeof(shakeState) ); +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaExpandS( + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + PCBYTE pbPrivateVectorSeed, + SIZE_T cbPrivateVectorSeed, + PSYMCRYPT_MLDSA_VECTOR pvs1, + PSYMCRYPT_MLDSA_VECTOR pvs2 ) +{ + SYMCRYPT_ASSERT( cbPrivateVectorSeed == SYMCRYPT_MLDSA_PRIVATE_VECTOR_SEED_SIZE ); + C_ASSERT( SYMCRYPT_MLDSA_REJBOUNDEDPOLY_SEED_SIZE == SYMCRYPT_MLDSA_PRIVATE_VECTOR_SEED_SIZE + 2 ); + + UINT32 nRows = pParams->nRows; + UINT32 nCols = pParams->nCols; + + // The expanded seed is the private vector seed concatenated with the (two-byte) row/column + // index of the vector element being expanded. + BYTE rejBoundedPolySeed[SYMCRYPT_MLDSA_REJBOUNDEDPOLY_SEED_SIZE]; + memcpy( rejBoundedPolySeed, pbPrivateVectorSeed, cbPrivateVectorSeed ); + + for(UINT16 i = 0; i < nCols; ++i) + { + SYMCRYPT_STORE_LSBFIRST16( rejBoundedPolySeed + SYMCRYPT_MLDSA_REJBOUNDEDPOLY_SEED_SIZE - sizeof(UINT16), i ); + SymCryptMlDsaRejBoundedPoly( pParams, rejBoundedPolySeed, sizeof(rejBoundedPolySeed), + SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvs1 ) ); + } + + for(UINT16 i = 0; i < nRows; ++i) + { + SYMCRYPT_STORE_LSBFIRST16( rejBoundedPolySeed + SYMCRYPT_MLDSA_REJBOUNDEDPOLY_SEED_SIZE - sizeof(UINT16), (UINT16) nCols + i ); + #pragma prefast( suppress: 6385, "False warning - reading invalid data from rejBoundedPolySeed" ); // Doesn't trigger in previous loop for some reason + SymCryptMlDsaRejBoundedPoly( pParams, rejBoundedPolySeed, sizeof(rejBoundedPolySeed), + SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvs2 ) ); + } + + SymCryptWipeKnownSize( rejBoundedPolySeed, sizeof(rejBoundedPolySeed) ); +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaSampleInBall( + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + PCBYTE pbCommitmentHash, + SIZE_T cbCommitmentHash, + PSYMCRYPT_MLDSA_POLYELEMENT peChallenge ) +{ + SymCryptMlDsaPolyElementSetZero( peChallenge ); + + SYMCRYPT_SHAKE256_STATE shakeState; + SymCryptShake256Init( &shakeState ); + SymCryptShake256Append( &shakeState, pbCommitmentHash, cbCommitmentHash ); + + // The first 8 bytes are used as as powers of negative one when sampling the challenge + // polynomial: c[j] = -1^(H(rho)[i + tau - 256]) + BYTE temp[8]; + SYMCRYPT_ASSERT( pParams->nChallengeNonZeroCoeffs <= 8 * sizeof(temp) ); + SymCryptShake256Extract( &shakeState, temp, sizeof(temp), FALSE ); + + UINT64 powersOfNegativeOne = SYMCRYPT_LOAD_LSBFIRST64( temp ); + + for(UINT32 i = SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS - pParams->nChallengeNonZeroCoeffs; + i < SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; + ++i) + { + BYTE j = 0; + do + { + SymCryptShake256Extract( &shakeState, &j, sizeof(j), FALSE ); + } while( j > i ); + + peChallenge->coeffs[i] = peChallenge->coeffs[j]; + + UINT32 negativeMask = SYMCRYPT_MASK32_NONZERO( powersOfNegativeOne & 1 ); + powersOfNegativeOne >>= 1; + + // Set the coefficient modulo Q + peChallenge->coeffs[j] = ((SYMCRYPT_MLDSA_Q - 1) & negativeMask) | (1 & ~negativeMask); + } + + SymCryptWipeKnownSize( &shakeState, sizeof(shakeState) ); +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaExpandMask( + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + PSYMCRYPT_SHAKE256_STATE pShakeState, + PCBYTE pbPrivateRandom, + SIZE_T cbPrivateRandom, + UINT16 counter, + PSYMCRYPT_MLDSA_VECTOR pvMask ) +{ + SYMCRYPT_ASSERT( pParams->nCols == pvMask->nElems ); + SYMCRYPT_ASSERT( cbPrivateRandom == SYMCRYPT_SHAKE256_RESULT_SIZE ); + + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + BYTE seedSuffix[2]; + + UINT32 cbShakeOutput = (pParams->maskCoefficientRangeLog2 + 1) * (SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8); + BYTE shakeOutput[20 * 32]; // Maximum size of the SHAKE output + SYMCRYPT_ASSERT( cbShakeOutput <= sizeof(shakeOutput) ); + + for(UINT16 i = 0; i < pvMask->nElems; ++i) + { + SYMCRYPT_STORE_LSBFIRST16( seedSuffix, counter + i ); + SymCryptShake256Append( pShakeState, pbPrivateRandom, cbPrivateRandom ); + SymCryptShake256Append( pShakeState, (PBYTE) &seedSuffix, sizeof(seedSuffix) ); + SymCryptShake256Extract( pShakeState, shakeOutput, cbShakeOutput, TRUE ); + + scError = SymCryptMlDsaPolyElementDecode( + shakeOutput, + pParams->maskCoefficientRangeLog2 + 1, + 1 << pParams->maskCoefficientRangeLog2, + SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvMask ) ); + SYMCRYPT_ASSERT( scError == SYMCRYPT_NO_ERROR ); + } + + SymCryptMlDsaVectorNTT( pvMask ); +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaMakeHint( + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + PSYMCRYPT_MLDSA_VECTOR pvWMinusCs2, + PSYMCRYPT_MLDSA_VECTOR pvWMinusCs2PlusCt0, + PSYMCRYPT_MLDSA_VECTOR pvDst, + UINT32* nBitsSet ) +{ + SYMCRYPT_ASSERT( pvWMinusCs2->nElems == pvWMinusCs2PlusCt0->nElems ); + SYMCRYPT_ASSERT( pvWMinusCs2->nElems == pvDst->nElems ); + + *nBitsSet = 0; + + SymCryptMlDsaVectorHighBits( pParams, pvWMinusCs2, pvWMinusCs2 ); + SymCryptMlDsaVectorHighBits( pParams, pvWMinusCs2PlusCt0, pvWMinusCs2PlusCt0 ); + + for( UINT32 i = 0; i < pvDst->nElems; ++i ) + { + PSYMCRYPT_MLDSA_POLYELEMENT peDst = SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvDst ); + PSYMCRYPT_MLDSA_POLYELEMENT peVec0 = SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvWMinusCs2 ); + PSYMCRYPT_MLDSA_POLYELEMENT peVec1 = SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvWMinusCs2PlusCt0 ); + + for( UINT32 j = 0; j < SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; ++j ) + { + peDst->coeffs[j] = 1 & ~SYMCRYPT_MASK32_EQ(peVec0->coeffs[j], peVec1->coeffs[j]); + *nBitsSet += peDst->coeffs[j]; + } + } +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaUseHint( + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + PCSYMCRYPT_MLDSA_VECTOR pvHint, + PSYMCRYPT_MLDSA_VECTOR pvCommitment ) +{ + SYMCRYPT_ASSERT( pvHint->nElems == pvCommitment->nElems ); + + UINT32 r1, r0; + UINT32 hintIsZeroMask; + UINT32 tmpMask; + UINT32 r0PrimeGtZeroMask; + UINT32 positiveOffsetCoeff; + UINT32 negativeOffsetCoeff; + + for(UINT32 i = 0; i < pvHint->nElems; ++i) + { + PSYMCRYPT_MLDSA_POLYELEMENT peHint = SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvHint ); + PSYMCRYPT_MLDSA_POLYELEMENT peCommitment = SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvCommitment ); + + for(UINT32 j = 0; j < SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; ++j) + { + SYMCRYPT_ASSERT( peHint->coeffs[j] == 0 || peHint->coeffs[j] == 1 ); + + SymCryptMlDsaDecompose( pParams, peCommitment->coeffs[j], &r1, &r0 ); + + // + // FIPS-204 UseHint lines 3-5 + // r1 in range [0, commitmentModulus) + // r0 in range [-commitmentRoundingRange, commitmentRoundingRange], encoded + // as an unsigned integer modulo Q + // + // Let r0' := r0 mod+- 2*gamma_2 + // (This is just r0 in FIPS 204, which uses signed integers.) + // + // r0 - 1 < commitmentRoundingRange => r0' > 0 + // r0 - 1 >= commitmentRoundingRange => r0' <= 0 + // + // There are three cases to consider: + // 1. If the hint is zero, the coefficient is set to r1. + // 2. Else if r0' > 0, the coefficient is (r1 + 1) mod commitmentModulus. + // 3. Else (r0' <= 0), the coefficient is (r1 - 1) mod commitmentModulus. + // + // The hint is public so we don't have to implement this in a sidechannel-safe manner, + // but avoiding branches will improve performance. + // + + // Set up masks to determine which case we fall into + hintIsZeroMask = peHint->coeffs[j] - 1; // 0 if hint is 1, 0xFFFFFFFF if hint is 0 + r0PrimeGtZeroMask = SYMCRYPT_MASK32_LT( r0 - 1, pParams->commitmentRoundingRange ); + + // Case 2: r0' > 0, so the coefficient is (r1 + 1) mod commitmentModulus, + // i.e. (r1 + 1) if r1 != commitmentModulus - 1, else 0 + tmpMask = ~SYMCRYPT_MASK32_EQ( r1, (UINT32) (pParams->commitmentModulus - 1) ); + positiveOffsetCoeff = tmpMask & (r1 + 1); + + // Case 3: r0' <= 0, so the coefficient is (r1 - 1) mod commitmentModulus, + // i.e. (r1 - 1) if r1 != 0, else commitmentModulus - 1 + tmpMask = SYMCRYPT_MASK32_EQ( r1, 0 ); + negativeOffsetCoeff = ( tmpMask & (pParams->commitmentModulus - 1) ) | ( ~tmpMask & ( r1 - 1 ) ); + + // Mask out each possible coefficient based on which case we fall into + r1 &= hintIsZeroMask; + positiveOffsetCoeff &= ~hintIsZeroMask & r0PrimeGtZeroMask; + negativeOffsetCoeff &= ~hintIsZeroMask & ~r0PrimeGtZeroMask; + + // Sanity check: no combination of masked values should be set simultaneously + SYMCRYPT_ASSERT( (r1 & positiveOffsetCoeff) == 0 ); + SYMCRYPT_ASSERT( (r1 & negativeOffsetCoeff) == 0 ); + SYMCRYPT_ASSERT( (positiveOffsetCoeff & negativeOffsetCoeff) == 0 ); + + // Finally, we can pick the correct coefficient using our masks + peCommitment->coeffs[j] = r1 | positiveOffsetCoeff | negativeOffsetCoeff; + } + } +} + +_Use_decl_annotations_ +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaPkEncode( + PCSYMCRYPT_MLDSAKEY pkMlDsakey, + PBYTE pbDst, + SIZE_T cbDst ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams = pkMlDsakey->pParams; + UINT32 cbEncodedKey = pParams->cbEncodedPublicKey; + PSYMCRYPT_MLDSA_INTERNAL_COMPUTATION_TEMPORARIES pTemps = NULL; + + if( cbDst != cbEncodedKey ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + pTemps = SymCryptMlDsaTemporariesAllocateAndInitialize( pParams, 1, 0, 0, 0 ); + if( pTemps == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + { + PSYMCRYPT_MLDSA_VECTOR pvT1InvNTT = pTemps->pvRowVectors[0]; + + PBYTE pbCurr = pbDst; + memcpy( pbCurr, pkMlDsakey->publicSeed, SYMCRYPT_MLDSA_PUBLIC_SEED_SIZE ); + + pbCurr += SYMCRYPT_MLDSA_PUBLIC_SEED_SIZE; + + SymCryptMlDsaVectorCopy( pkMlDsakey->pvt1, pvT1InvNTT ); + SymCryptMlDsaVectorINTT( pvT1InvNTT ); + + // Pack each coefficient of T1 into 10 bits. Coefficients are rounded by Power2Round so they're + // guaranteed to be at most 10 bits long. + SYMCRYPT_ASSERT( cbDst - SYMCRYPT_MLDSA_PUBLIC_SEED_SIZE == SYMCRYPT_INTERNAL_MLDSA_SIZEOF_ENCODED_VECTOR( pvT1InvNTT, 10u ) ); + SymCryptMlDsaVectorEncode( pvT1InvNTT, 10, 0, pbCurr ); + } + +cleanup: + if( pTemps != NULL) + { + SymCryptMlDsaTemporariesFree( pTemps ); + } + + return scError; +} + +_Use_decl_annotations_ +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaPkDecode( + PCBYTE pbSrc, + SIZE_T cbSrc, + UINT32 flags, + PSYMCRYPT_MLDSAKEY pkMlDsakey ) +{ + UNREFERENCED_PARAMETER( flags ); + + // Size of one encoded polynomial from t1: 256 coefficients * 10 bits per coefficient / 8 bits per byte + const UINT32 cbEncodedPoly = SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS * 10 / 8; + + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PCBYTE pbCurr = pbSrc; + PSYMCRYPT_MLDSA_INTERNAL_COMPUTATION_TEMPORARIES pTemps = NULL; + PSYMCRYPT_SHAKE256_STATE pShakeState = NULL; + + if( cbSrc != pkMlDsakey->pParams->cbEncodedPublicKey ) + { + scError = SYMCRYPT_WRONG_KEY_SIZE; + goto cleanup; + } + + // Allocate space for an encoded polynomial so we can copy the input, decode it, and append it + // to our SHAKE state. We copy it to a local buffer so we don't violate the read-once rule when + // appending to the SHAKE state. + pTemps = SymCryptMlDsaTemporariesAllocateAndInitialize( pkMlDsakey->pParams, 0, 0, 0, cbEncodedPoly ); + if( pTemps == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + // Reset the private key state in case this key object is being reused + if( pkMlDsakey->hasRootSeed ) + { + SymCryptWipeKnownSize( pkMlDsakey->rootSeed, SYMCRYPT_MLDSA_ROOT_SEED_SIZE ); + pkMlDsakey->hasRootSeed = FALSE; + } + + if( pkMlDsakey->hasPrivateKey ) + { + SymCryptWipeKnownSize( pkMlDsakey->privateSigningSeed, SYMCRYPT_MLDSA_PRIVATE_SIGNING_SEED_SIZE ); + SymCryptMlDsaVectorSetZero( pkMlDsakey->pvs1 ); + SymCryptMlDsaVectorSetZero( pkMlDsakey->pvs2 ); + SymCryptMlDsaVectorSetZero( pkMlDsakey->pvt0 ); + pkMlDsakey->hasPrivateKey = FALSE; + } + + memcpy( pkMlDsakey->publicSeed, pbCurr, SYMCRYPT_MLDSA_PUBLIC_SEED_SIZE ); + pbCurr += SYMCRYPT_MLDSA_PUBLIC_SEED_SIZE; + + pShakeState = &(pTemps->shake256State); + + SymCryptShake256Init( pShakeState ); + SymCryptShake256Append( pShakeState, pkMlDsakey->publicSeed, SYMCRYPT_MLDSA_PUBLIC_SEED_SIZE ); + + for( UINT32 i = 0; i < pkMlDsakey->pvt1->nElems; ++i ) + { + memcpy( pTemps->pbScratch, pbCurr, cbEncodedPoly ); + + PSYMCRYPT_MLDSA_POLYELEMENT peElement = SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pkMlDsakey->pvt1 ); + + scError = SymCryptMlDsaPolyElementDecode( + pTemps->pbScratch, + 10, + 0, + peElement ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + SymCryptShake256Append( pShakeState, pTemps->pbScratch, cbEncodedPoly ); + + pbCurr += cbEncodedPoly; + } + + SYMCRYPT_ASSERT( pbCurr == pbSrc + cbSrc ); + + SymCryptMlDsaVectorNTT( pkMlDsakey->pvt1 ); + + SymCryptMlDsaExpandA( + pkMlDsakey->publicSeed, + SYMCRYPT_MLDSA_PUBLIC_SEED_SIZE, + pkMlDsakey->pmA ); + + SymCryptShake256Result( pShakeState, pkMlDsakey->publicKeyHash ); + +cleanup: + if( pTemps != NULL ) + { + SymCryptMlDsaTemporariesFree( pTemps ); + } + + return scError; +} + +_Use_decl_annotations_ +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaSkEncode( + PCSYMCRYPT_MLDSAKEY pkMlDsakey, + PBYTE pbDst, + SIZE_T cbDst ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams = pkMlDsakey->pParams; + UINT32 cbEncodedKey = pParams->cbEncodedPrivateKey; + PSYMCRYPT_MLDSA_INTERNAL_COMPUTATION_TEMPORARIES pTemps = NULL; + PBYTE pbCurr = pbDst; + + if( !pkMlDsakey->hasPrivateKey ) + { + scError = SYMCRYPT_INCOMPATIBLE_FORMAT; + goto cleanup; + } + + if( cbDst != cbEncodedKey ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + pTemps = SymCryptMlDsaTemporariesAllocateAndInitialize( pParams, 1, 1, 0, 0 ); + if( pTemps == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + memcpy( pbCurr, pkMlDsakey->publicSeed, SYMCRYPT_MLDSA_PUBLIC_SEED_SIZE ); + pbCurr += SYMCRYPT_MLDSA_PUBLIC_SEED_SIZE; + + memcpy( pbCurr, pkMlDsakey->privateSigningSeed, SYMCRYPT_MLDSA_PRIVATE_SIGNING_SEED_SIZE ); + pbCurr += SYMCRYPT_MLDSA_PRIVATE_SIGNING_SEED_SIZE; + + memcpy( pbCurr, pkMlDsakey->publicKeyHash, SYMCRYPT_MLDSA_PUBLIC_KEY_HASH_SIZE ); + pbCurr += SYMCRYPT_MLDSA_PUBLIC_KEY_HASH_SIZE; + + { + // Inverse NTT and encode s1 + PSYMCRYPT_MLDSA_VECTOR pvs1InvNTT = pTemps->pvColVectors[0]; + SymCryptMlDsaVectorCopy( pkMlDsakey->pvs1, pvs1InvNTT ); + SymCryptMlDsaVectorINTT( pvs1InvNTT ); + + SymCryptMlDsaVectorEncode( + pvs1InvNTT, + pParams->encodedCoefficientBitLength, + pParams->privateKeyRange, + pbCurr ); + pbCurr += SYMCRYPT_INTERNAL_MLDSA_SIZEOF_ENCODED_VECTOR( pvs1InvNTT, pParams->encodedCoefficientBitLength ); + } + + { + // Inverse NTT and encode s2 + PSYMCRYPT_MLDSA_VECTOR pvs2InvNTT = pTemps->pvRowVectors[0]; + SymCryptMlDsaVectorCopy( pkMlDsakey->pvs2, pvs2InvNTT ); + SymCryptMlDsaVectorINTT( pvs2InvNTT ); + + SymCryptMlDsaVectorEncode( + pvs2InvNTT, + pParams->encodedCoefficientBitLength, + pParams->privateKeyRange, + pbCurr ); + pbCurr += SYMCRYPT_INTERNAL_MLDSA_SIZEOF_ENCODED_VECTOR( pvs2InvNTT, pParams->encodedCoefficientBitLength ); + } + + { + // Inverse NTT and encode t0 + // Can re-use the previous temporary row vector as it's no longer needed + PSYMCRYPT_MLDSA_VECTOR pvt0InvNTT = pTemps->pvRowVectors[0]; + SymCryptMlDsaVectorCopy( pkMlDsakey->pvt0, pvt0InvNTT ); + SymCryptMlDsaVectorINTT( pvt0InvNTT ); + + SymCryptMlDsaVectorEncode( + pvt0InvNTT, + SYMCRYPT_POWER2ROUND_LOW_ORDER_BITS, + 1 << (SYMCRYPT_POWER2ROUND_LOW_ORDER_BITS - 1), + pbCurr ); + pbCurr += SYMCRYPT_INTERNAL_MLDSA_SIZEOF_ENCODED_VECTOR( pvt0InvNTT, SYMCRYPT_POWER2ROUND_LOW_ORDER_BITS ); + } + + SYMCRYPT_ASSERT( pbCurr == pbDst + cbDst ); + +cleanup: + if( pTemps != NULL) + { + SymCryptMlDsaTemporariesFree( pTemps ); + } + + return scError; +} + +_Use_decl_annotations_ +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaSkDecode( + PCBYTE pbSrc, + SIZE_T cbSrc, + UINT32 flags, + PSYMCRYPT_MLDSAKEY pkMlDsakey ) +{ + UNREFERENCED_PARAMETER( flags ); + + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams = pkMlDsakey->pParams; + PSYMCRYPT_MLDSA_INTERNAL_COMPUTATION_TEMPORARIES pTemps = NULL; + PCBYTE pbCurr = pbSrc; + BYTE pubKeyHashTmp[SYMCRYPT_MLDSA_PUBLIC_KEY_HASH_SIZE]; + + if( cbSrc != pkMlDsakey->pParams->cbEncodedPrivateKey ) + { + scError = SYMCRYPT_WRONG_KEY_SIZE; + goto cleanup; + } + + pTemps = SymCryptMlDsaTemporariesAllocateAndInitialize( + pParams, + 2, // row vectors - t, t0 + 0, // column vectors + 1, // poly elements - temporary space + pkMlDsakey->pParams->cbEncodedPublicKey ); // scratch space + if( pTemps == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + // We use temporaries to recalculate t, t0, t1 and the public key hash from the import values + // for A (derived from the public seed), s1 and s2. This is not strictly necessary for callers + // who just want to import a private key and use it for signing, but it allows the private + // key to also be used for verification, and more importantly, provides extra robustness by + // ensuring that the derived values are consistent with the encoded values. If the perf of + // importing a key becomes a concern, we can move this recalculation to the first time the key + // is used for verification. + PSYMCRYPT_MLDSA_VECTOR pvtTmp = pTemps->pvRowVectors[0]; + PSYMCRYPT_MLDSA_VECTOR pvt0Tmp = pTemps->pvRowVectors[1]; + PSYMCRYPT_MLDSA_POLYELEMENT peTmp = pTemps->pePolyElements[0]; + PBYTE pbEncodedPubKeyTmp = pTemps->pbScratch; + + // Reset the key state in case this key is being reused + pkMlDsakey->hasRootSeed = FALSE; + pkMlDsakey->hasPrivateKey = FALSE; + SymCryptWipeKnownSize( pkMlDsakey->rootSeed, SYMCRYPT_MLDSA_ROOT_SEED_SIZE ); + + memcpy( pkMlDsakey->publicSeed, pbCurr, SYMCRYPT_MLDSA_PUBLIC_SEED_SIZE ); + pbCurr += SYMCRYPT_MLDSA_PUBLIC_SEED_SIZE; + + memcpy( pkMlDsakey->privateSigningSeed, pbCurr, SYMCRYPT_MLDSA_PRIVATE_SIGNING_SEED_SIZE ); + pbCurr += SYMCRYPT_MLDSA_PRIVATE_SIGNING_SEED_SIZE; + + memcpy( pubKeyHashTmp, pbCurr, SYMCRYPT_MLDSA_PUBLIC_KEY_HASH_SIZE ); + pbCurr += SYMCRYPT_MLDSA_PUBLIC_KEY_HASH_SIZE; + + // Expand A matrix + SymCryptMlDsaExpandA( pkMlDsakey->publicSeed, SYMCRYPT_MLDSA_PUBLIC_SEED_SIZE, pkMlDsakey->pmA ); + + scError = SymCryptMlDsaVectorDecode( + pbCurr, + pParams->encodedCoefficientBitLength, + pParams->privateKeyRange, + pkMlDsakey->pvs1 ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + pbCurr += SYMCRYPT_INTERNAL_MLDSA_SIZEOF_ENCODED_VECTOR( pkMlDsakey->pvs1, pParams->encodedCoefficientBitLength ); + + + scError = SymCryptMlDsaVectorDecode( + pbCurr, + pParams->encodedCoefficientBitLength, + pParams->privateKeyRange, + pkMlDsakey->pvs2 ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + pbCurr += SYMCRYPT_INTERNAL_MLDSA_SIZEOF_ENCODED_VECTOR( pkMlDsakey->pvs2, pParams->encodedCoefficientBitLength ); + + scError = SymCryptMlDsaVectorDecode( + pbCurr, + SYMCRYPT_POWER2ROUND_LOW_ORDER_BITS, + 1 << (SYMCRYPT_POWER2ROUND_LOW_ORDER_BITS - 1), + pvt0Tmp ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + pbCurr += SYMCRYPT_INTERNAL_MLDSA_SIZEOF_ENCODED_VECTOR( pkMlDsakey->pvt0, SYMCRYPT_POWER2ROUND_LOW_ORDER_BITS ); + + SYMCRYPT_ASSERT( pbCurr == pbSrc + cbSrc ); + + // Convert s1 and s2 to NTT form + SymCryptMlDsaVectorNTT( pkMlDsakey->pvs1 ); + SymCryptMlDsaVectorNTT( pkMlDsakey->pvs2 ); + + SymCryptMlDsakeyComputeT( + pkMlDsakey->pmA, + pkMlDsakey->pvs1, + pkMlDsakey->pvs2, + pkMlDsakey->pvt0, + pkMlDsakey->pvt1, + pvtTmp, + peTmp ); + + // If the recalculated t0 doesn't match, the imported key is invalid. + // Note: SymCryptMlDsakeyComputeT sets t0 and t1 in NTT form. + if( memcmp( pkMlDsakey->pvt0, pvt0Tmp, pvt0Tmp->cbTotalSize ) != 0 ) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + // Convert t0 and t1 to NTT form + SymCryptMlDsaVectorNTT( pkMlDsakey->pvt0 ); + SymCryptMlDsaVectorNTT( pkMlDsakey->pvt1 ); + + scError = SymCryptMlDsaPkEncode( pkMlDsakey, pbEncodedPubKeyTmp, pParams->cbEncodedPublicKey ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Recalculate the public key hash and compare it to the imported value. If they don't match, + // the imported key is invalid. + SymCryptShake256( + pbEncodedPubKeyTmp, + pParams->cbEncodedPublicKey, + pkMlDsakey->publicKeyHash, + SYMCRYPT_MLDSA_PUBLIC_KEY_HASH_SIZE ); + + if( memcmp( pkMlDsakey->publicKeyHash, pubKeyHashTmp, SYMCRYPT_MLDSA_PUBLIC_KEY_HASH_SIZE ) != 0 ) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + pkMlDsakey->hasPrivateKey = TRUE; + +cleanup: + if( pTemps != NULL ) + { + SymCryptMlDsaTemporariesFree( pTemps ); + } + + // Wipe private state on error as defense-in-depth + if( scError != SYMCRYPT_NO_ERROR ) + { + SymCryptWipeKnownSize( pkMlDsakey->privateSigningSeed, SYMCRYPT_MLDSA_PRIVATE_SIGNING_SEED_SIZE ); + SymCryptMlDsaVectorSetZero( pkMlDsakey->pvs1 ); + SymCryptMlDsaVectorSetZero( pkMlDsakey->pvs2 ); + SymCryptMlDsaVectorSetZero( pkMlDsakey->pvt0 ); + } + + return scError; +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaSigEncode( + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + PBYTE pbCommitmentHash, + SIZE_T cbCommitmentHash, + PCSYMCRYPT_MLDSA_VECTOR pvResponse, + PCSYMCRYPT_MLDSA_VECTOR pvHint, + PBYTE pbDst, + SIZE_T cbDst ) +{ + SYMCRYPT_ASSERT( cbDst == pParams->cbEncodedSignature ); + UNREFERENCED_PARAMETER( cbDst ); + + const SIZE_T cbEncodedHint = pParams->nHintNonZeroCoeffs + pvHint->nElems; + + PBYTE pbCurr = pbDst; + + memcpy( pbCurr, pbCommitmentHash, cbCommitmentHash ); + pbCurr += cbCommitmentHash; + + SymCryptMlDsaVectorEncode( + pvResponse, + pParams->maskCoefficientRangeLog2 + 1, + 1 << pParams->maskCoefficientRangeLog2, + pbCurr ); + + pbCurr += SYMCRYPT_INTERNAL_MLDSA_SIZEOF_ENCODED_VECTOR( pvResponse, pParams->maskCoefficientRangeLog2 + 1 ); + + SymCryptMlDsaHintBitPack( pParams, pvHint, pbCurr ); + pbCurr += cbEncodedHint; + + SYMCRYPT_ASSERT( pbCurr == pbDst + cbDst ); +} + +_Use_decl_annotations_ +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaSigDecode( + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + PCBYTE pbSig, + SIZE_T cbSig, + PBYTE pbCommitmentHash, + SIZE_T cbCommitmentHash, + PSYMCRYPT_MLDSA_VECTOR pvResponse, + PSYMCRYPT_MLDSA_VECTOR pvHint ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + if( cbSig != pParams->cbEncodedSignature ) + { + scError = SYMCRYPT_WRONG_DATA_SIZE; + goto cleanup; + } + + SYMCRYPT_ASSERT( cbCommitmentHash == pParams->cbCommitmentHash ); + + PCBYTE pbCurr = pbSig; + + memcpy( pbCommitmentHash, pbSig, cbCommitmentHash ); + pbCurr += cbCommitmentHash; + + SymCryptMlDsaVectorDecode( + pbCurr, + pParams->maskCoefficientRangeLog2 + 1, + 1 << pParams->maskCoefficientRangeLog2, + pvResponse ); + pbCurr += SYMCRYPT_INTERNAL_MLDSA_SIZEOF_ENCODED_VECTOR( pvResponse, pParams->maskCoefficientRangeLog2 + 1 ); + + SYMCRYPT_ASSERT( cbSig - (pbCurr - pbSig) == (SIZE_T) pParams->nHintNonZeroCoeffs + pvHint->nElems ); + + scError = SymCryptMlDsaHintBitUnpack( pParams, pbCurr, pvHint ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + +cleanup: + return scError; +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaHintBitPack( + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + PCSYMCRYPT_MLDSA_VECTOR pvSrc, + PBYTE pbDst ) +{ + SymCryptWipe( pbDst, pParams->nHintNonZeroCoeffs ); + + UINT32 index = 0; + for( UINT32 i = 0; i < pvSrc->nElems; ++i ) + { + PSYMCRYPT_MLDSA_POLYELEMENT peElement = SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvSrc ); + for( UINT32 j = 0; j < SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; ++j ) + { + // Side channel safety: the hint is public (part of the signature) so it's okay to + // leak information here + if( peElement->coeffs[j] != 0 ) + { + // Each byte in the hint is the index of a non-zero coefficient + pbDst[index] = (BYTE) j; + index++; + } + } + + // The number of non-zero coefficients in polynomials 0..i is stored in the + // (nHintNonZeroCoeffs + i)th byte. This allows us to determine which indices correspond + // to which polynomials during decoding while still only using one byte per index. + SYMCRYPT_ASSERT( index <= pParams->nHintNonZeroCoeffs ); + pbDst[pParams->nHintNonZeroCoeffs + i] = (BYTE) index; + } +} + +_Use_decl_annotations_ +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaHintBitUnpack( + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + PCBYTE pbSrc, + PSYMCRYPT_MLDSA_VECTOR pvDst ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + UINT32 index = 0; + UINT32 maxIndex = 0; + UINT32 first = 0; + + for( UINT32 i = 0; i < pvDst->nElems; ++i ) + { + PSYMCRYPT_MLDSA_POLYELEMENT peElement = SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvDst ); + + // Ensure pvDst is zeroed out before unpacking + SymCryptMlDsaPolyElementSetZero( peElement ); + + maxIndex = pbSrc[pParams->nHintNonZeroCoeffs + i]; + if( ( maxIndex < index) || + ( maxIndex > pParams->nHintNonZeroCoeffs) ) + { + // Invalid input + scError = SYMCRYPT_SIGNATURE_VERIFICATION_FAILURE; + goto cleanup; + } + + first = index; + while( index < maxIndex ) + { + if( index > first && pbSrc[index - 1] >= pbSrc[index]) + { + // Invalid input + scError = SYMCRYPT_SIGNATURE_VERIFICATION_FAILURE; + goto cleanup; + } + + peElement->coeffs[pbSrc[index]] = 1; + index++; + } + } + + for(UINT32 leftover = index; leftover < pParams->nHintNonZeroCoeffs; ++leftover) + { + if( pbSrc[leftover] != 0 ) + { + // Invalid input + scError = SYMCRYPT_SIGNATURE_VERIFICATION_FAILURE; + goto cleanup; + } + } + +cleanup: + return scError; +} + +_Use_decl_annotations_ +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHashMlDsaValidateHashAlgAndGetOid( + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + SYMCRYPT_PQDSA_HASH_ID hashAlg, + SIZE_T cbHash, + PCSYMCRYPT_OID* ppOid ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PCSYMCRYPT_OID pHashOid = NULL; + BOOLEAN fFound = FALSE; + BOOLEAN fIsXof = FALSE; + SIZE_T cbHashExpected = 0; + + for( UINT32 i = 0; i < SYMCRYPT_ARRAY_SIZE(g_hashOidMap); ++i ) + { + if( g_hashOidMap[i].hashId == hashAlg ) + { + fFound = TRUE; + pHashOid = g_hashOidMap[i].pOid; + fIsXof = g_hashOidMap[i].fIsXof; + cbHashExpected = g_hashOidMap[i].pHashAlgorithm->resultSize; + break; + } + } + + if( !fFound ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + SYMCRYPT_ASSERT( pHashOid->cbOID == SYMCRYPT_MLDSA_SUPPORTED_HASH_OID_SIZE ); + + // For traditional hash algorithms (non-XOFs), the hash length must exactly match the expected + // value. For XOFs, the output length is arbitrary, and any length is acceptable as long as it + // meets the minimum collision strength specified by the parameter set (cbCommitmentHash) + if( (!fIsXof && cbHash != cbHashExpected ) || + ( cbHash < pParams->cbCommitmentHash ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + *ppOid = pHashOid; + +cleanup: + return scError; +} + +FORCEINLINE +INT32 +SYMCRYPT_CALL +SymCryptMlDsaModPlusMinus( UINT32 r, UINT32 modulus ) +{ + SYMCRYPT_ASSERT( r < modulus ); + + // In most cases this function is used with even moduli, e.g. with Power2Round. + // However, it's okay if the modulus is odd. FIPS 204 specifies that the output is in the range + // (-ceil(modulus/2), floor(modulus/2) ] + // = ( -((modulus + 1) // 2), modulus // 2 ] + // = [ -modulus // 2, modulus // 2 ] + const INT32 halfModulus = modulus >> 1; + + // Mask for conditional subtraction: 0 if r <= (modulus/2), 0xFFFFFFFF otherwise + UINT32 subtractionMask = SYMCRYPT_MASK32_LT( halfModulus, r ); + + INT32 r0 = (INT32) r - (modulus & subtractionMask); + SYMCRYPT_ASSERT( r0 > -halfModulus && r0 <= halfModulus); + + return r0; +} + +_Use_decl_annotations_ +FORCEINLINE +UINT32 +SYMCRYPT_CALL +SymCryptMlDsaPolyElementInfinityNorm( PCSYMCRYPT_MLDSA_POLYELEMENT peSrc ) +{ + UINT32 norm = 0; + UINT32 mask; + INT32 curr; + for( UINT32 i = 0; i < SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; i++ ) + { + // Convert the coefficient to signed form + curr = SymCryptMlDsaModPlusMinus( peSrc->coeffs[i], SYMCRYPT_MLDSA_Q ); + + // If the coefficient is less than 0, negate it + mask = SYMCRYPT_MASK32_LT( curr, 0 ); + curr = (curr & ~mask) | ((curr * -1) & mask); + + // If the coefficient is greater than the current norm, update the norm + mask = SYMCRYPT_MASK32_LT( norm, curr ); + norm = (norm & ~mask) | (curr & mask); + } + + return norm; +} + +_Use_decl_annotations_ +UINT32 +SYMCRYPT_CALL +SymCryptMlDsaVectorInfinityNorm( PCSYMCRYPT_MLDSA_VECTOR pvSrc ) +{ + UINT32 norm = 0; + UINT32 curr; + INT32 mask; + for( UINT32 i = 0; i < pvSrc->nElems; i++ ) + { + curr = SymCryptMlDsaPolyElementInfinityNorm( SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT(i, pvSrc) ); + mask = SYMCRYPT_MASK32_LT( norm, curr ); + norm = (norm & ~mask) | (curr & mask); + } + + return norm; +} + +_Use_decl_annotations_ +FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptMlDsaDecompose( + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + UINT32 r, + UINT32 *puR1, + UINT32 *puR0 ) +{ + SYMCRYPT_ASSERT( r < SYMCRYPT_MLDSA_Q ); + SYMCRYPT_ASSERT( puR1 != NULL || puR0 != NULL ); + + UINT32 r1 = 0; + UINT32 mask = 0; + INT32 r0 = 0; + + // Some tricks for calculating this are borrowed from the reference implementation + // https://github.com/pq-crystals/dilithium/blob/master/ref/rounding.c + // + // The multiplication constants for calculating r1 are in the PCSYMCRYPT_MLDSA_INTERNAL_PARAMS + // structure. They are calculated as follows. + // + // To keep intermediate values in the 32-bit range, instead of using r directly, we calculate + // ceil( r/128 ). We likewise divide the commitment rounding range by 128. + // + // For ML-DSA 44: + // 2*commmitmentRoundingRange = 2 * 95,232 = 190,464 + // 190464 // 128 = 1488 + // 1 / 1488 ~= floor(2^24 // 1488) * 2^24 = 11,275 // 2^24 + // For ML-DSA 65 and 87: + // 2*commmitmentRoundingRange = 2*261888 = 523776 + // 523776 // 128 = 4092 + // 1 / 4092 ~= floor(2^22 // 4092) * 2^22 = 1025 // 2^22 = 4100 // 2^24 + // + + UINT32 rdiv128 = (r + 127) >> 7; + r1 = ( rdiv128 * pParams->decomposeR1Factor + (1 << 23)) >> 24; + + // Handle corner case: if r1 is outside of the expected range, set it to 0 + mask = SYMCRYPT_MASK32_LT( r1, pParams->commitmentModulus ); + r1 &= mask; + + r0 = r - ( r1 * 2 * pParams->commitmentRoundingRange ); + + // Handle corner case for r0 + r0 -= ((((SYMCRYPT_MLDSA_Q - 1) >> 1) - r0) >> 31) & SYMCRYPT_MLDSA_Q; + r0 = SymCryptMlDsaSignedCoefficientModQ( r0 ); + + if( puR1 != NULL ) + { + *puR1 = r1; + } + + if( puR0 != NULL ) + { + *puR0 = r0; + } +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaVectorHighBits( + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + PCSYMCRYPT_MLDSA_VECTOR pvSrc, + PSYMCRYPT_MLDSA_VECTOR pvDst ) +{ + SYMCRYPT_ASSERT( pvSrc->nElems == pvDst->nElems ); + + for(UINT32 i = 0; i < pvSrc->nElems; i++) + { + for(UINT32 j = 0; j < SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; j++) + { + SymCryptMlDsaDecompose( + pParams, + SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvSrc )->coeffs[j], + &(SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvDst )->coeffs[j]), + NULL ); + } + } +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaVectorLowBits( + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + PCSYMCRYPT_MLDSA_VECTOR pvSrc, + PSYMCRYPT_MLDSA_VECTOR pvDst ) +{ + SYMCRYPT_ASSERT( pvSrc->nElems == pvDst->nElems ); + + for(UINT32 i = 0; i < pvSrc->nElems; i++) + { + for(UINT32 j = 0; j < SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; j++) + { + SymCryptMlDsaDecompose( + pParams, + SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvSrc )->coeffs[j], + NULL, + &(SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvDst )->coeffs[j]) ); + } + } +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaPower2Round( + UINT32 r, + UINT32* puR1, + UINT32* puR0 ) +{ + SYMCRYPT_ASSERT( r < SYMCRYPT_MLDSA_Q ); + + UINT32 rPrime = r & ( (1 << SYMCRYPT_POWER2ROUND_LOW_ORDER_BITS) - 1 ); // r mod 2^d + INT32 r0 = SymCryptMlDsaModPlusMinus( rPrime, 1 << SYMCRYPT_POWER2ROUND_LOW_ORDER_BITS ); + + UINT32 r1 = (r - r0) >> SYMCRYPT_POWER2ROUND_LOW_ORDER_BITS; + + *puR1 = r1; + *puR0 = SymCryptMlDsaSignedCoefficientModQ( r0 ); //, 4096 ); +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaPolyElementPower2Round( + PCSYMCRYPT_MLDSA_POLYELEMENT peSrc, + PSYMCRYPT_MLDSA_POLYELEMENT peDst1, + PSYMCRYPT_MLDSA_POLYELEMENT peDst0 ) +{ + UINT32 r1, r0; + + for(UINT32 i = 0; i < SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; i++) + { + SymCryptMlDsaPower2Round( peSrc->coeffs[i], &r1, &r0 ); + peDst1->coeffs[i] = r1; + peDst0->coeffs[i] = r0; + } +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaVectorPower2Round( + PCSYMCRYPT_MLDSA_VECTOR pvSrc, + PSYMCRYPT_MLDSA_VECTOR pvDst1, + PSYMCRYPT_MLDSA_VECTOR pvDst0 ) +{ + SYMCRYPT_ASSERT( pvSrc->nElems == pvDst1->nElems ); + SYMCRYPT_ASSERT( pvSrc->nElems == pvDst0->nElems ); + + for(UINT32 i = 0; i < pvSrc->nElems; i++) + { + SymCryptMlDsaPolyElementPower2Round( + SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvSrc ), + SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvDst1 ), + SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvDst0 ) ); + } +} + +FORCEINLINE +UINT32 +SYMCRYPT_CALL +SymCryptMlDsaSignedCoefficientModQ( INT32 coefficient ) +{ + SYMCRYPT_ASSERT(coefficient > -1 * SYMCRYPT_MLDSA_Q && coefficient < SYMCRYPT_MLDSA_Q); + + UINT32 result; + UINT32 negativeMask = SYMCRYPT_MASK32_LT( coefficient, 0 ); + + result = coefficient + (SYMCRYPT_MLDSA_Q & negativeMask); + SYMCRYPT_ASSERT( result < SYMCRYPT_MLDSA_Q ); + + return result; +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaPolyElementEncode( + PCSYMCRYPT_MLDSA_POLYELEMENT peSrc, + UINT32 nBitsPerCoefficient, + UINT32 signedCoefficientBound, + PBYTE pbDst ) +{ + SYMCRYPT_ASSERT( nBitsPerCoefficient > 0 ); + SYMCRYPT_ASSERT( nBitsPerCoefficient <= 20 ); // Maximum number of bits per coefficient across all encodings + + INT32 coefficient; + UINT32 nBitsInCoefficient; + UINT32 bitsToEncode; + UINT32 nBitsToEncode; + UINT32 cbDstWritten = 0; + UINT32 accumulator = 0; + UINT32 nBitsInAccumulator = 0; + + for(UINT32 i = 0; i < SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; i++ ) + { + nBitsInCoefficient = nBitsPerCoefficient; + coefficient = peSrc->coeffs[i]; + + SYMCRYPT_ASSERT( coefficient < SYMCRYPT_MLDSA_Q ); + + // If the coefficient is greater than the signedCoefficientBound, that means it is + // a negative value modulo Q, so we need to subtract Q to get the original value. + coefficient -= ( SYMCRYPT_MLDSA_Q & SYMCRYPT_MASK32_LT( signedCoefficientBound, coefficient ) ); + + // The coefficient is now in the range [-signedCoefficientBound, signedCoefficientBound], + // we need to map it to the range [0, 2*signedCoefficientBound] for encoding. + coefficient = SYMCRYPT_INTERNAL_MLDSA_SHORT_COEFFICIENT_ENCODE_DECODE( coefficient, signedCoefficientBound ); + + // Some coefficients are always positive and so do not need any special encoding. In this + // case, we revert to the original value from the source polynomial. + coefficient = ( peSrc->coeffs[i] & SYMCRYPT_MASK32_ZERO( signedCoefficientBound ) ) | + ( coefficient & SYMCRYPT_MASK32_NONZERO( signedCoefficientBound ) ); + + SYMCRYPT_ASSERT( coefficient >= 0 ); + SYMCRYPT_ASSERT( signedCoefficientBound == 0 || (UINT32) coefficient <= signedCoefficientBound * 2 ); + SYMCRYPT_ASSERT( (UINT32) coefficient < (1ul << nBitsPerCoefficient) ); + + // encode the coefficient + // simple loop to add bits to accumulator and write accumulator to output + do + { + nBitsToEncode = SYMCRYPT_MIN( nBitsInCoefficient, 32 - nBitsInAccumulator ); + + bitsToEncode = coefficient & ( ( 1UL << nBitsToEncode ) - 1 ); + coefficient >>= nBitsToEncode; + nBitsInCoefficient -= nBitsToEncode; + + accumulator |= ( bitsToEncode << nBitsInAccumulator ); + nBitsInAccumulator += nBitsToEncode; + if(nBitsInAccumulator == 32) + { + SYMCRYPT_STORE_LSBFIRST32( pbDst + cbDstWritten, accumulator ); + cbDstWritten += 4; + accumulator = 0; + nBitsInAccumulator = 0; + } + } while( nBitsInCoefficient > 0 ); + } + + SYMCRYPT_ASSERT(nBitsInAccumulator == 0); + SYMCRYPT_ASSERT(cbDstWritten == (nBitsPerCoefficient*(SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8))); +} + +_Use_decl_annotations_ +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaPolyElementDecode( + PCBYTE pbSrc, + UINT32 nBitsPerCoefficient, + UINT32 signedCoefficientBound, + PSYMCRYPT_MLDSA_POLYELEMENT peDst ) +{ + SYMCRYPT_ASSERT( nBitsPerCoefficient > 0 ); + SYMCRYPT_ASSERT( nBitsPerCoefficient <= 20 ); // Maximum number of bits per coefficient across all encodings + + INT32 coefficient; + UINT32 nBitsInCoefficient; + UINT32 bitsToDecode; + UINT32 nBitsToDecode; + UINT32 cbSrcRead = 0; + UINT32 accumulator = 0; + UINT32 nBitsInAccumulator = 0; + + for(UINT32 i = 0; i < SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; i++ ) + { + coefficient = 0; + nBitsInCoefficient = 0; + + // first gather and decode bits from pbSrc + do + { + if(nBitsInAccumulator == 0) + { + accumulator = SYMCRYPT_LOAD_LSBFIRST32( pbSrc+cbSrcRead ); + cbSrcRead += 4; + nBitsInAccumulator = 32; + } + + nBitsToDecode = SYMCRYPT_MIN(nBitsPerCoefficient-nBitsInCoefficient, nBitsInAccumulator); + SYMCRYPT_ASSERT(nBitsToDecode <= nBitsInAccumulator); + + bitsToDecode = accumulator & ((1UL<<nBitsToDecode)-1); + accumulator >>= nBitsToDecode; + nBitsInAccumulator -= nBitsToDecode; + + coefficient |= (bitsToDecode << nBitsInCoefficient); + nBitsInCoefficient += nBitsToDecode; + } while( nBitsPerCoefficient > nBitsInCoefficient ); + SYMCRYPT_ASSERT( nBitsInCoefficient == nBitsPerCoefficient ); + + // Coefficient should always be positive at this point since it's encoded in <= 20 bits + SYMCRYPT_ASSERT( coefficient >= 0 ); + + if( ( signedCoefficientBound != 0 ) && ( (UINT32) coefficient > 2 * signedCoefficientBound ) ) + { + // Most of the encoded components of ML-DSA keys and signatures cannot be outside the + // valid range, because the number of bits in their encodings do not permit invalid + // values. However, the private key components s1 and s2 have encodings that do allow + // for invalid values (because their valid ranges are [-2, 2] or [-4, 4]). If any + // coefficient is outside this range, the key is invalid and we return an error. + // We do not need to do this check in constant time because we treat the validity of an + // imported key as public information. + return SYMCRYPT_INVALID_BLOB; + } + + // If this coefficient is intended to be signed, we need to decode it into its original + // signed form and then map it modulo Q. + // Side-channel safety: signedCoefficientBound just indicates which component we are + // decoding, so it's public information + if( signedCoefficientBound != 0 ) + { + coefficient = SYMCRYPT_INTERNAL_MLDSA_SHORT_COEFFICIENT_ENCODE_DECODE( coefficient, signedCoefficientBound ); + coefficient = SymCryptMlDsaSignedCoefficientModQ( coefficient ); + } + + peDst->coeffs[i] = coefficient; + } + + SYMCRYPT_ASSERT(nBitsInAccumulator == 0); + SYMCRYPT_ASSERT(cbSrcRead == (nBitsPerCoefficient*(SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8))); + + return SYMCRYPT_NO_ERROR; +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaVectorEncode( + PCSYMCRYPT_MLDSA_VECTOR pvSrc, + UINT32 nBitsPerCoefficient, + UINT32 signedCoefficientBound, + PBYTE pbDst ) +{ + PBYTE pbCurr = pbDst; + const SIZE_T cbEncodedPoly = nBitsPerCoefficient * ( SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8 ); + + for( UINT32 i = 0; i < pvSrc->nElems; ++i ) + { + PCSYMCRYPT_MLDSA_POLYELEMENT peElement = SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvSrc ); + + SymCryptMlDsaPolyElementEncode( + peElement, + nBitsPerCoefficient, + signedCoefficientBound, + pbCurr ); + + pbCurr += cbEncodedPoly; + } + + SYMCRYPT_ASSERT( pbCurr == pbDst + ( pvSrc->nElems * cbEncodedPoly ) ); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaVectorDecode( + _In_reads_bytes_( pvDst->nElems * nBitsPerCoefficient * (SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8) ) + PCBYTE pbSrc, + UINT32 nBitsPerCoefficient, + UINT32 signedCoefficientBound, + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvDst ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PCBYTE pbCurr = pbSrc; + const SIZE_T cbEncodedPoly = nBitsPerCoefficient * ( SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8 ); + + for( UINT32 i = 0; i < pvDst->nElems; ++i ) + { + PSYMCRYPT_MLDSA_POLYELEMENT peElement = SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( i, pvDst ); + + scError = SymCryptMlDsaPolyElementDecode( + pbCurr, + nBitsPerCoefficient, + signedCoefficientBound, + peElement ); + if( scError != SYMCRYPT_NO_ERROR ) + { + // Side-channel safety: the validity of an imported key is public information. + // See comment in SymCryptMlDsaPolyElementDecode. + goto cleanup; + } + + pbCurr += cbEncodedPoly; + } + + SYMCRYPT_ASSERT( pbCurr == pbSrc + ( pvDst->nElems * cbEncodedPoly ) ); + +cleanup: + return scError; +} + +_Use_decl_annotations_ +PSYMCRYPT_MLDSA_INTERNAL_COMPUTATION_TEMPORARIES +SYMCRYPT_CALL +SymCryptMlDsaTemporariesAllocateAndInitialize( + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + UINT32 nRowVectors, + UINT32 nColVectors, + UINT32 nPolyElements, + UINT32 cbScratch ) +{ + // Round scratch space to nearest multiple of 8 for alignment + cbScratch = (cbScratch + 7) & ~7; + + UINT32 cbTotalSize = sizeof( SYMCRYPT_MLDSA_INTERNAL_COMPUTATION_TEMPORARIES) + + ( nRowVectors * sizeof( PSYMCRYPT_MLDSA_VECTOR ) ) + // Row vector pointers + ( nColVectors * sizeof( PSYMCRYPT_MLDSA_VECTOR ) ) + // Col vector pointers + ( nPolyElements * sizeof( PSYMCRYPT_MLDSA_POLYELEMENT ) ) + // Poly element pointers + ( nRowVectors * pParams->cbRowVector ) + // Row vector buffer + ( nColVectors * pParams->cbColVector ) + // Col vector buffer + ( nPolyElements * pParams->cbPolyElement ) + // Poly element buffer + ( cbScratch ); // Scratch buffer + + PBYTE pbBuffer = SymCryptCallbackAlloc( cbTotalSize ); + if( pbBuffer == NULL ) + { + return NULL; + } + + SymCryptWipe( pbBuffer, cbTotalSize ); + + PSYMCRYPT_MLDSA_INTERNAL_COMPUTATION_TEMPORARIES pTemporaries = + (PSYMCRYPT_MLDSA_INTERNAL_COMPUTATION_TEMPORARIES) pbBuffer; + + pTemporaries->cbTotalSize = cbTotalSize; + pTemporaries->nRowVectors = nRowVectors; + pTemporaries->nColVectors = nColVectors; + pTemporaries->nPolyElements = nPolyElements; + pTemporaries->cbScratch = cbScratch; + + PBYTE pbCurrent = pbBuffer + sizeof( SYMCRYPT_MLDSA_INTERNAL_COMPUTATION_TEMPORARIES ); + + if( nRowVectors > 0 ) + { + pTemporaries->pvRowVectors = (PSYMCRYPT_MLDSA_VECTOR *) pbCurrent; + pbCurrent += nRowVectors * sizeof( PSYMCRYPT_MLDSA_VECTOR ); + } + + if( nColVectors > 0 ) + { + pTemporaries->pvColVectors = (PSYMCRYPT_MLDSA_VECTOR *) pbCurrent; + pbCurrent += nColVectors * sizeof( PSYMCRYPT_MLDSA_VECTOR ); + } + + if( nPolyElements > 0 ) + { + pTemporaries->pePolyElements = (PSYMCRYPT_MLDSA_POLYELEMENT *) pbCurrent; + pbCurrent += nPolyElements * sizeof( PSYMCRYPT_MLDSA_POLYELEMENT ); + } + + for(UINT32 i = 0; i < nRowVectors; i++) + { + pTemporaries->pvRowVectors[i] = SymCryptMlDsaVectorCreate( pbCurrent, pParams->cbRowVector, pParams->nRows ); + pbCurrent += pParams->cbRowVector; + } + + for(UINT32 i = 0; i < nColVectors; i++) + { + pTemporaries->pvColVectors[i] = SymCryptMlDsaVectorCreate( pbCurrent, pParams->cbColVector, pParams->nCols ); + pbCurrent += pParams->cbColVector; + } + + for(UINT32 i = 0; i < nPolyElements; i++) + { + pTemporaries->pePolyElements[i] = SymCryptMlDsaPolyElementCreate( pbCurrent, pParams->cbPolyElement ); + pbCurrent += pParams->cbPolyElement; + } + + if( cbScratch > 0 ) + { + pTemporaries->pbScratch = pbCurrent; + pbCurrent += cbScratch; + } + + SYMCRYPT_ASSERT( pbCurrent == pbBuffer + cbTotalSize ); + + SYMCRYPT_SET_MAGIC( pTemporaries ); + + return pTemporaries; +} + +_Use_decl_annotations_ +VOID +SYMCRYPT_CALL +SymCryptMlDsaTemporariesFree( + PSYMCRYPT_MLDSA_INTERNAL_COMPUTATION_TEMPORARIES pTemporaries ) +{ + SYMCRYPT_CHECK_MAGIC( pTemporaries ); + + SymCryptWipe( pTemporaries, pTemporaries->cbTotalSize ); + SymCryptCallbackFree( pTemporaries ); +} diff --git a/libs/symcrypt/lib/mlkem.c b/libs/symcrypt/lib/mlkem.c new file mode 100644 index 00000000000..975775587ce --- /dev/null +++ b/libs/symcrypt/lib/mlkem.c @@ -0,0 +1,1164 @@ +// +// mlkem.c ML-KEM related functionality +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +#define NROWS_MLKEM512 (2) +#define NROWS_MLKEM768 (3) +#define NROWS_MLKEM1024 (4) + +const SYMCRYPT_MLKEM_INTERNAL_PARAMS SymCryptMlKemInternalParamsMlKem512 = +{ + .params = SYMCRYPT_MLKEM_PARAMS_MLKEM512, + .cbPolyElement = SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT, + .nRows = NROWS_MLKEM512, + .cbVector = sizeof(SYMCRYPT_MLKEM_VECTOR) + (NROWS_MLKEM512 * SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT), + .cbMatrix = sizeof(SYMCRYPT_MLKEM_MATRIX) + (NROWS_MLKEM512 * + NROWS_MLKEM512 * + SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT), + .nEta1 = 3, + .nEta2 = 2, + .nBitsOfU = 10, + .nBitsOfV = 4, +}; + +const SYMCRYPT_MLKEM_INTERNAL_PARAMS SymCryptMlKemInternalParamsMlKem768 = +{ + .params = SYMCRYPT_MLKEM_PARAMS_MLKEM768, + .cbPolyElement = SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT, + .nRows = NROWS_MLKEM768, + .cbVector = sizeof(SYMCRYPT_MLKEM_VECTOR) + (NROWS_MLKEM768 * SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT), + .cbMatrix = sizeof(SYMCRYPT_MLKEM_MATRIX) + (NROWS_MLKEM768 * + NROWS_MLKEM768 * + SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT), + .nEta1 = 2, + .nEta2 = 2, + .nBitsOfU = 10, + .nBitsOfV = 4, +}; + +const SYMCRYPT_MLKEM_INTERNAL_PARAMS SymCryptMlKemInternalParamsMlKem1024 = +{ + .params = SYMCRYPT_MLKEM_PARAMS_MLKEM1024, + .cbPolyElement = SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT, + .nRows = NROWS_MLKEM1024, + .cbVector = sizeof(SYMCRYPT_MLKEM_VECTOR) + (NROWS_MLKEM1024 * SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT), + .cbMatrix = sizeof(SYMCRYPT_MLKEM_MATRIX) + (NROWS_MLKEM1024 * + NROWS_MLKEM1024 * + SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT), + .nEta1 = 2, + .nEta2 = 2, + .nBitsOfU = 11, + .nBitsOfV = 5, +}; + +static +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlKemkeyGetInternalParamsFromParams( + SYMCRYPT_MLKEM_PARAMS params, + _Out_ PSYMCRYPT_MLKEM_INTERNAL_PARAMS pInternalParams ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + switch( params ) + { + case SYMCRYPT_MLKEM_PARAMS_MLKEM512: + *pInternalParams = SymCryptMlKemInternalParamsMlKem512; + break; + case SYMCRYPT_MLKEM_PARAMS_MLKEM768: + *pInternalParams = SymCryptMlKemInternalParamsMlKem768; + break; + case SYMCRYPT_MLKEM_PARAMS_MLKEM1024: + *pInternalParams = SymCryptMlKemInternalParamsMlKem1024; + break; + default: + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + +cleanup: + return scError; +} + +static +PSYMCRYPT_MLKEMKEY +SYMCRYPT_CALL +SymCryptMlKemkeyInitialize( + _In_ PCSYMCRYPT_MLKEM_INTERNAL_PARAMS pInternalParams, + _Out_writes_bytes_(cbKey) PBYTE pbKey, + UINT32 cbKey ) +{ + PSYMCRYPT_MLKEMKEY pRes = NULL; + PSYMCRYPT_MLKEMKEY pKey = (PSYMCRYPT_MLKEMKEY)pbKey; + PBYTE pbCurr = pbKey + sizeof(SYMCRYPT_MLKEMKEY); + + SymCryptWipeKnownSize( pbKey, cbKey ); + + pKey->fAlgorithmInfo = 0; + pKey->params = *pInternalParams; + pKey->cbTotalSize = cbKey; + pKey->hasPrivateSeed = FALSE; + pKey->hasPrivateKey = FALSE; + + pKey->pmAtranspose = SymCryptMlKemMatrixCreate( pbCurr, pInternalParams->cbMatrix, pInternalParams->nRows ); + if( pKey->pmAtranspose == NULL ) + { + goto cleanup; + } + pbCurr += pInternalParams->cbMatrix; + + pKey->pvt = SymCryptMlKemVectorCreate( pbCurr, pInternalParams->cbVector, pInternalParams->nRows ); + if( pKey->pvt == NULL ) + { + goto cleanup; + } + pbCurr += pInternalParams->cbVector; + + pKey->pvs = SymCryptMlKemVectorCreate( pbCurr, pInternalParams->cbVector, pInternalParams->nRows ); + if( pKey->pvs == NULL ) + { + goto cleanup; + } + pbCurr += pInternalParams->cbVector; + + SYMCRYPT_ASSERT( pbCurr == (pbKey + cbKey) ); + + SYMCRYPT_SET_MAGIC( pKey ); + + pRes = pKey; + +cleanup: + return pRes; +} + +PSYMCRYPT_MLKEMKEY +SYMCRYPT_CALL +SymCryptMlKemkeyAllocate( + SYMCRYPT_MLKEM_PARAMS params ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PBYTE pbKey = NULL; + UINT32 cbKey; + SYMCRYPT_MLKEM_INTERNAL_PARAMS internalParams; + + PSYMCRYPT_MLKEMKEY pKey = NULL; + + scError = SymCryptMlKemkeyGetInternalParamsFromParams(params, &internalParams); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + cbKey = sizeof(SYMCRYPT_MLKEMKEY) + internalParams.cbMatrix + (2*internalParams.cbVector); + + pbKey = SymCryptCallbackAlloc( cbKey ); + if ( pbKey == NULL ) + { + goto cleanup; + } + + pKey = SymCryptMlKemkeyInitialize( &internalParams, pbKey, cbKey ); + if ( pKey == NULL ) + { + goto cleanup; + } + + pbKey = NULL; + +cleanup: + if ( pbKey != NULL ) + { + SymCryptCallbackFree( pbKey ); + } + + return pKey; +} + +VOID +SYMCRYPT_CALL +SymCryptMlKemkeyFree( + _Inout_ PSYMCRYPT_MLKEMKEY pkMlKemkey ) +{ + SYMCRYPT_CHECK_MAGIC( pkMlKemkey ); + + SymCryptWipe( (PBYTE) pkMlKemkey, pkMlKemkey->cbTotalSize ); + + SymCryptCallbackFree( pkMlKemkey ); +} + +#define SYMCRYPT_MLKEM_SIZEOF_ENCODED_UNCOMPRESSED_VECTOR(_nRows) (384UL * _nRows) + +// s and t are encoded uncompressed vectors +// public seed, H(encapsulation key) and z are each 32 bytes +#define SYMCRYPT_MLKEM_SIZEOF_FORMAT_DECAPSULATION_KEY(_nRows) ((2*SYMCRYPT_MLKEM_SIZEOF_ENCODED_UNCOMPRESSED_VECTOR(_nRows)) + (3*32)) +// t is encoded uncompressed vector +// public seed is 32 bytes +#define SYMCRYPT_MLKEM_SIZEOF_FORMAT_ENCAPSULATION_KEY(_nRows) (SYMCRYPT_MLKEM_SIZEOF_ENCODED_UNCOMPRESSED_VECTOR(_nRows) + 32) + +C_ASSERT( SYMCRYPT_MLKEM_SIZEOF_FORMAT_DECAPSULATION_KEY(NROWS_MLKEM512) == SYMCRYPT_MLKEM_DECAPSULATION_KEY_SIZE_MLKEM512 ); +C_ASSERT( SYMCRYPT_MLKEM_SIZEOF_FORMAT_DECAPSULATION_KEY(NROWS_MLKEM768) == SYMCRYPT_MLKEM_DECAPSULATION_KEY_SIZE_MLKEM768 ); +C_ASSERT( SYMCRYPT_MLKEM_SIZEOF_FORMAT_DECAPSULATION_KEY(NROWS_MLKEM1024) == SYMCRYPT_MLKEM_DECAPSULATION_KEY_SIZE_MLKEM1024 ); + +C_ASSERT( SYMCRYPT_MLKEM_SIZEOF_FORMAT_ENCAPSULATION_KEY(NROWS_MLKEM512) == SYMCRYPT_MLKEM_ENCAPSULATION_KEY_SIZE_MLKEM512 ); +C_ASSERT( SYMCRYPT_MLKEM_SIZEOF_FORMAT_ENCAPSULATION_KEY(NROWS_MLKEM768) == SYMCRYPT_MLKEM_ENCAPSULATION_KEY_SIZE_MLKEM768 ); +C_ASSERT( SYMCRYPT_MLKEM_SIZEOF_FORMAT_ENCAPSULATION_KEY(NROWS_MLKEM1024) == SYMCRYPT_MLKEM_ENCAPSULATION_KEY_SIZE_MLKEM1024 ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlKemSizeofKeyFormatFromParams( + SYMCRYPT_MLKEM_PARAMS params, + SYMCRYPT_MLKEMKEY_FORMAT mlKemkeyFormat, + _Out_ SIZE_T* pcbKeyFormat ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + SYMCRYPT_MLKEM_INTERNAL_PARAMS internalParams; + + if( mlKemkeyFormat == SYMCRYPT_MLKEMKEY_FORMAT_NULL ) + { + scError = SYMCRYPT_INCOMPATIBLE_FORMAT; + goto cleanup; + } + + scError = SymCryptMlKemkeyGetInternalParamsFromParams(params, &internalParams); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + switch( mlKemkeyFormat ) + { + case SYMCRYPT_MLKEMKEY_FORMAT_PRIVATE_SEED: + *pcbKeyFormat = SYMCRYPT_MLKEM_PRIVATE_SEED_SIZE; + break; + + case SYMCRYPT_MLKEMKEY_FORMAT_DECAPSULATION_KEY: + *pcbKeyFormat = SYMCRYPT_MLKEM_SIZEOF_FORMAT_DECAPSULATION_KEY(internalParams.nRows); + break; + + case SYMCRYPT_MLKEMKEY_FORMAT_ENCAPSULATION_KEY: + *pcbKeyFormat = SYMCRYPT_MLKEM_SIZEOF_FORMAT_ENCAPSULATION_KEY(internalParams.nRows); + break; + + default: + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + +cleanup: + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlKemSizeofCiphertextFromParams( + SYMCRYPT_MLKEM_PARAMS params, + _Out_ SIZE_T* pcbCiphertext ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + SYMCRYPT_MLKEM_INTERNAL_PARAMS internalParams; + SIZE_T cbU, cbV; + + scError = SymCryptMlKemkeyGetInternalParamsFromParams(params, &internalParams); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // u vector encoded with nBitsOfU * SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS bits per polynomial + cbU = ((SIZE_T)internalParams.nRows) * internalParams.nBitsOfU * (SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8); + // v polynomial encoded with nBitsOfV * SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS bits + cbV = ((SIZE_T)internalParams.nBitsOfV) * (SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8); + *pcbCiphertext = cbU + cbV; + + SYMCRYPT_ASSERT( (internalParams.params != SYMCRYPT_MLKEM_PARAMS_MLKEM512) || ((cbU + cbV) == SYMCRYPT_MLKEM_CIPHERTEXT_SIZE_MLKEM512) ); + SYMCRYPT_ASSERT( (internalParams.params != SYMCRYPT_MLKEM_PARAMS_MLKEM768) || ((cbU + cbV) == SYMCRYPT_MLKEM_CIPHERTEXT_SIZE_MLKEM768) ); + SYMCRYPT_ASSERT( (internalParams.params != SYMCRYPT_MLKEM_PARAMS_MLKEM1024) || ((cbU + cbV) == SYMCRYPT_MLKEM_CIPHERTEXT_SIZE_MLKEM1024) ); + +cleanup: + return scError; +} + +static +VOID +SYMCRYPT_CALL +SymCryptMlKemkeyExpandPublicMatrixFromPublicSeed( + _Inout_ PSYMCRYPT_MLKEMKEY pkMlKemkey, + _Inout_ PSYMCRYPT_MLKEM_INTERNAL_COMPUTATION_TEMPORARIES pCompTemps ) +{ + UINT32 i, j; + BYTE coordinates[2]; + + PSYMCRYPT_SHAKE128_STATE pShakeStateBase = &pCompTemps->hashState0.shake128State; + PSYMCRYPT_SHAKE128_STATE pShakeStateWork = &pCompTemps->hashState1.shake128State; + const UINT32 nRows = pkMlKemkey->params.nRows; + + SymCryptShake128Init( pShakeStateBase ); + SymCryptShake128Append( pShakeStateBase, pkMlKemkey->publicSeed, sizeof(pkMlKemkey->publicSeed) ); + + for( i=0; i<nRows; i++ ) + { + coordinates[1] = (BYTE)i; + for( j=0; j<nRows; j++ ) + { + coordinates[0] = (BYTE)j; + SymCryptShake128StateCopy( pShakeStateBase, pShakeStateWork ); + SymCryptShake128Append( pShakeStateWork, coordinates, sizeof(coordinates) ); + + SymCryptMlKemPolyElementSampleNTTFromShake128( pShakeStateWork, pkMlKemkey->pmAtranspose->apPolyElements[(i*nRows)+j] ); + } + } + + // no need to wipe; everything computed here is always public +} + +static +VOID +SYMCRYPT_CALL +SymCryptMlKemkeyComputeEncapsulationKeyHash( + _Inout_ PSYMCRYPT_MLKEMKEY pkMlKemkey, + _Inout_ PSYMCRYPT_MLKEM_INTERNAL_COMPUTATION_TEMPORARIES pCompTemps, + SIZE_T cbEncodedVector ) +{ + PSYMCRYPT_SHA3_256_STATE pState = &pCompTemps->hashState0.sha3_256State; + + SymCryptSha3_256Init( pState ); + SymCryptSha3_256Append( pState, pkMlKemkey->encodedT, cbEncodedVector ); + SymCryptSha3_256Append( pState, pkMlKemkey->publicSeed, sizeof(pkMlKemkey->publicSeed) ); + SymCryptSha3_256Result( pState, pkMlKemkey->encapsKeyHash ); +} + +static +VOID +SYMCRYPT_CALL +SymCryptMlKemkeyExpandFromPrivateSeed( + _Inout_ PSYMCRYPT_MLKEMKEY pkMlKemkey, + _Inout_ PSYMCRYPT_MLKEM_INTERNAL_COMPUTATION_TEMPORARIES pCompTemps ) +{ + BYTE privateSeedHash[SYMCRYPT_SHA3_512_RESULT_SIZE]; + BYTE CBDSampleBuffer[3*64 + 1]; + PSYMCRYPT_MLKEM_VECTOR pvTmp; + PSYMCRYPT_MLKEM_POLYELEMENT_ACCUMULATOR paTmp; + PSYMCRYPT_SHAKE256_STATE pShakeStateBase = &pCompTemps->hashState0.shake256State; + PSYMCRYPT_SHAKE256_STATE pShakeStateWork = &pCompTemps->hashState1.shake256State; + UINT32 i; + const UINT32 nRows = pkMlKemkey->params.nRows; + const UINT32 nEta1 = pkMlKemkey->params.nEta1; + const SIZE_T cbEncodedVector = SYMCRYPT_MLKEM_SIZEOF_ENCODED_UNCOMPRESSED_VECTOR(nRows); + const UINT32 cbPolyElement = pkMlKemkey->params.cbPolyElement; + const UINT32 cbVector = pkMlKemkey->params.cbVector; + + SYMCRYPT_ASSERT( pkMlKemkey->hasPrivateSeed ); + SYMCRYPT_ASSERT( (nEta1 == 2) || (nEta1 == 3) ); + SYMCRYPT_ASSERT( cbEncodedVector <= sizeof(pkMlKemkey->encodedT) ); + + pvTmp = SymCryptMlKemVectorCreate( pCompTemps->abVectorBuffer0, cbVector, nRows ); + SYMCRYPT_ASSERT( pvTmp != NULL ); + paTmp = SymCryptMlKemPolyElementAccumulatorCreate( pCompTemps->abPolyElementAccumulatorBuffer, 2*cbPolyElement ); + SYMCRYPT_ASSERT( paTmp != NULL ); + + // (rho || sigma) = G(d || k) + // use CBDSampleBuffer to concatenate the private seed and encoding of nRows + memcpy( CBDSampleBuffer, pkMlKemkey->privateSeed, sizeof(pkMlKemkey->privateSeed) ); + CBDSampleBuffer[sizeof(pkMlKemkey->privateSeed)] = (BYTE) nRows; + SymCryptSha3_512( CBDSampleBuffer, sizeof(pkMlKemkey->privateSeed)+1, privateSeedHash ); + + // copy public seed + memcpy( pkMlKemkey->publicSeed, privateSeedHash, sizeof(pkMlKemkey->publicSeed) ); + + // generate A from public seed + SymCryptMlKemkeyExpandPublicMatrixFromPublicSeed( pkMlKemkey, pCompTemps ); + + // Initialize pShakeStateBase with sigma + SymCryptShake256Init( pShakeStateBase ); + SymCryptShake256Append( pShakeStateBase, privateSeedHash+sizeof(pkMlKemkey->publicSeed), 32 ); + + // Expand s in place + for( i=0; i<nRows; i++ ) + { + CBDSampleBuffer[0] = (BYTE) i; + SymCryptShake256StateCopy( pShakeStateBase, pShakeStateWork ); + SymCryptShake256Append( pShakeStateWork, CBDSampleBuffer, 1 ); + + SymCryptShake256Extract( pShakeStateWork, CBDSampleBuffer, 64ul*nEta1, FALSE ); + + SymCryptMlKemPolyElementSampleCBDFromBytes( CBDSampleBuffer, nEta1, SYMCRYPT_INTERNAL_MLKEM_VECTOR_ELEMENT(i, pkMlKemkey->pvs) ); + } + // Expand e in t, ready for multiply-add + for( i=0; i<nRows; i++ ) + { + CBDSampleBuffer[0] = (BYTE) (nRows+i); + SymCryptShake256StateCopy( pShakeStateBase, pShakeStateWork ); + SymCryptShake256Append( pShakeStateWork, CBDSampleBuffer, 1 ); + + SymCryptShake256Extract( pShakeStateWork, CBDSampleBuffer, 64ul*nEta1, FALSE ); + + SymCryptMlKemPolyElementSampleCBDFromBytes( CBDSampleBuffer, nEta1, SYMCRYPT_INTERNAL_MLKEM_VECTOR_ELEMENT(i, pkMlKemkey->pvt) ); + } + + // Perform NTT on s and e + SymCryptMlKemVectorNTT( pkMlKemkey->pvs ); + SymCryptMlKemVectorNTT( pkMlKemkey->pvt ); + + // pvTmp = s .* R + SymCryptMlKemVectorMulR( pkMlKemkey->pvs, pvTmp ); + + // t = ((A o (s .* R)) ./ R) + e = A o s + e + SymCryptMlKemMatrixVectorMontMulAndAdd( pkMlKemkey->pmAtranspose, pvTmp, pkMlKemkey->pvt, paTmp ); + + // transpose A + SymCryptMlKemMatrixTranspose( pkMlKemkey->pmAtranspose ); + + // precompute byte-encoding of public vector t + SymCryptMlKemVectorCompressAndEncode( pkMlKemkey->pvt, 12, pkMlKemkey->encodedT, cbEncodedVector ); + + // precompute hash of encapsulation key blob + SymCryptMlKemkeyComputeEncapsulationKeyHash( pkMlKemkey, pCompTemps, cbEncodedVector ); + + // Cleanup! + SymCryptWipeKnownSize( privateSeedHash, sizeof(privateSeedHash) ); + SymCryptWipeKnownSize( CBDSampleBuffer, sizeof(CBDSampleBuffer) ); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlKemkeySetValue( + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_MLKEMKEY_FORMAT mlKemkeyFormat, + UINT32 flags, + _Inout_ PSYMCRYPT_MLKEMKEY pkMlKemkey ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PCBYTE pbCurr = pbSrc; + PSYMCRYPT_MLKEM_INTERNAL_COMPUTATION_TEMPORARIES pCompTemps = NULL; + const UINT32 nRows = pkMlKemkey->params.nRows; + const SIZE_T cbEncodedVector = SYMCRYPT_MLKEM_SIZEOF_ENCODED_UNCOMPRESSED_VECTOR( nRows ); + + // Ensure only allowed flags are specified + UINT32 allowedFlags = SYMCRYPT_FLAG_KEY_NO_FIPS | SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION; + + if ( ( flags & ~allowedFlags ) != 0 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Check that minimal validation flag only specified with no fips + if ( ( ( flags & SYMCRYPT_FLAG_KEY_NO_FIPS ) == 0 ) && + ( ( flags & SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION ) != 0 ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if( mlKemkeyFormat == SYMCRYPT_MLKEMKEY_FORMAT_NULL ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if( ( flags & SYMCRYPT_FLAG_KEY_NO_FIPS ) == 0 ) + { + // Ensure ML-KEM algorithm selftest is run before first use of ML-KEM algorithms; + // notably _before_ first full KeyGen + SYMCRYPT_RUN_SELFTEST_ONCE( + SymCryptMlKemSelftest, + SYMCRYPT_SELFTEST_ALGORITHM_MLKEM); + } + + pCompTemps = SymCryptCallbackAlloc( sizeof(SYMCRYPT_MLKEM_INTERNAL_COMPUTATION_TEMPORARIES) ); + if( pCompTemps == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + if( mlKemkeyFormat == SYMCRYPT_MLKEMKEY_FORMAT_PRIVATE_SEED ) + { + if( cbSrc != SYMCRYPT_MLKEM_PRIVATE_SEED_SIZE ) + { + scError = SYMCRYPT_WRONG_KEY_SIZE; + goto cleanup; + } + + pkMlKemkey->hasPrivateSeed = TRUE; + memcpy( pkMlKemkey->privateSeed, pbCurr, sizeof(pkMlKemkey->privateSeed) ); + pbCurr += sizeof(pkMlKemkey->privateSeed); + + pkMlKemkey->hasPrivateKey = TRUE; + memcpy( pkMlKemkey->privateRandom, pbCurr, sizeof(pkMlKemkey->privateRandom) ); + pbCurr += sizeof(pkMlKemkey->privateRandom); + + SymCryptMlKemkeyExpandFromPrivateSeed( pkMlKemkey, pCompTemps ); + } + else if( mlKemkeyFormat == SYMCRYPT_MLKEMKEY_FORMAT_DECAPSULATION_KEY ) + { + if( cbSrc != SYMCRYPT_MLKEM_SIZEOF_FORMAT_DECAPSULATION_KEY( nRows ) ) + { + scError = SYMCRYPT_WRONG_KEY_SIZE; + goto cleanup; + } + + // decode s + scError = SymCryptMlKemVectorDecodeAndDecompress( pbCurr, cbEncodedVector, 12, pkMlKemkey->pvs ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + pbCurr += cbEncodedVector; + + // copy t and decode t + memcpy( pkMlKemkey->encodedT, pbCurr, cbEncodedVector ); + pbCurr += cbEncodedVector; + scError = SymCryptMlKemVectorDecodeAndDecompress( pkMlKemkey->encodedT, cbEncodedVector, 12, pkMlKemkey->pvt ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // copy public seed and expand public matrix + memcpy( pkMlKemkey->publicSeed, pbCurr, sizeof(pkMlKemkey->publicSeed) ); + pbCurr += sizeof(pkMlKemkey->publicSeed); + SymCryptMlKemkeyExpandPublicMatrixFromPublicSeed( pkMlKemkey, pCompTemps ); + + // transpose A + SymCryptMlKemMatrixTranspose( pkMlKemkey->pmAtranspose ); + + // compute hash of encapsulation key blob + SymCryptMlKemkeyComputeEncapsulationKeyHash( pkMlKemkey, pCompTemps, cbEncodedVector ); + + // check hash of encapsulation key matches hash in the provided blob + if( !SymCryptEqual( pbCurr, pkMlKemkey->encapsKeyHash, sizeof(pkMlKemkey->encapsKeyHash) ) ) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + pbCurr += sizeof(pkMlKemkey->encapsKeyHash); + + // copy private random + memcpy( pkMlKemkey->privateRandom, pbCurr, sizeof(pkMlKemkey->privateRandom) ); + pbCurr += sizeof(pkMlKemkey->privateRandom); + + pkMlKemkey->hasPrivateSeed = FALSE; + pkMlKemkey->hasPrivateKey = TRUE; + } + else if( mlKemkeyFormat == SYMCRYPT_MLKEMKEY_FORMAT_ENCAPSULATION_KEY ) + { + if( cbSrc != SYMCRYPT_MLKEM_SIZEOF_FORMAT_ENCAPSULATION_KEY( nRows ) ) + { + scError = SYMCRYPT_WRONG_KEY_SIZE; + goto cleanup; + } + + // copy t and decode t + memcpy( pkMlKemkey->encodedT, pbCurr, cbEncodedVector ); + pbCurr += cbEncodedVector; + scError = SymCryptMlKemVectorDecodeAndDecompress( pkMlKemkey->encodedT, cbEncodedVector, 12, pkMlKemkey->pvt ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // copy public seed and expand public matrix + memcpy( pkMlKemkey->publicSeed, pbCurr, sizeof(pkMlKemkey->publicSeed) ); + pbCurr += sizeof(pkMlKemkey->publicSeed); + SymCryptMlKemkeyExpandPublicMatrixFromPublicSeed( pkMlKemkey, pCompTemps ); + + // transpose A + SymCryptMlKemMatrixTranspose( pkMlKemkey->pmAtranspose ); + + // precompute hash of encapsulation key blob + SymCryptMlKemkeyComputeEncapsulationKeyHash( pkMlKemkey, pCompTemps, cbEncodedVector ); + + pkMlKemkey->hasPrivateSeed = FALSE; + pkMlKemkey->hasPrivateKey = FALSE; + } + else + { + scError = SYMCRYPT_NOT_IMPLEMENTED; + goto cleanup; + } + + SYMCRYPT_ASSERT( pbCurr == pbSrc + cbSrc ); + +cleanup: + if( pCompTemps != NULL ) + { + SymCryptWipe( pCompTemps, sizeof(*pCompTemps) ); + SymCryptCallbackFree( pCompTemps ); + } + + return scError; +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlKemkeyGetValue( + _In_ PCSYMCRYPT_MLKEMKEY pkMlKemkey, + _Out_writes_bytes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst, + SYMCRYPT_MLKEMKEY_FORMAT mlKemkeyFormat, + UINT32 flags ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PBYTE pbCurr = pbDst; + const UINT32 nRows = pkMlKemkey->params.nRows; + const SIZE_T cbEncodedVector = SYMCRYPT_MLKEM_SIZEOF_ENCODED_UNCOMPRESSED_VECTOR( nRows ); + + UNREFERENCED_PARAMETER( flags ); + + if( mlKemkeyFormat == SYMCRYPT_MLKEMKEY_FORMAT_NULL ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if( mlKemkeyFormat == SYMCRYPT_MLKEMKEY_FORMAT_PRIVATE_SEED ) + { + if( cbDst != SYMCRYPT_MLKEM_PRIVATE_SEED_SIZE ) + { + scError = SYMCRYPT_WRONG_KEY_SIZE; + goto cleanup; + } + + if( !pkMlKemkey->hasPrivateSeed ) + { + scError = SYMCRYPT_INCOMPATIBLE_FORMAT; + goto cleanup; + } + + memcpy( pbCurr, pkMlKemkey->privateSeed, sizeof(pkMlKemkey->privateSeed) ); + pbCurr += sizeof(pkMlKemkey->privateSeed); + + memcpy( pbCurr, pkMlKemkey->privateRandom, sizeof(pkMlKemkey->privateRandom) ); + pbCurr += sizeof(pkMlKemkey->privateRandom); + } + else if( mlKemkeyFormat == SYMCRYPT_MLKEMKEY_FORMAT_DECAPSULATION_KEY ) + { + if( cbDst != SYMCRYPT_MLKEM_SIZEOF_FORMAT_DECAPSULATION_KEY( nRows ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if( !pkMlKemkey->hasPrivateKey ) + { + scError = SYMCRYPT_INCOMPATIBLE_FORMAT; + goto cleanup; + } + + // We don't precompute byte-encoding of private key as exporting decapsulation key is not a critical path operation + // All other fields are kept in memory + SymCryptMlKemVectorCompressAndEncode( pkMlKemkey->pvs, 12, pbCurr, cbEncodedVector ); + pbCurr += cbEncodedVector; + + memcpy( pbCurr, pkMlKemkey->encodedT, cbEncodedVector ); + pbCurr += cbEncodedVector; + + memcpy( pbCurr, pkMlKemkey->publicSeed, sizeof(pkMlKemkey->publicSeed) ); + pbCurr += sizeof(pkMlKemkey->publicSeed); + + memcpy( pbCurr, pkMlKemkey->encapsKeyHash, sizeof(pkMlKemkey->encapsKeyHash) ); + pbCurr += sizeof(pkMlKemkey->encapsKeyHash); + + memcpy( pbCurr, pkMlKemkey->privateRandom, sizeof(pkMlKemkey->privateRandom) ); + pbCurr += sizeof(pkMlKemkey->privateRandom); + } + else if( mlKemkeyFormat == SYMCRYPT_MLKEMKEY_FORMAT_ENCAPSULATION_KEY ) + { + if( cbDst != SYMCRYPT_MLKEM_SIZEOF_FORMAT_ENCAPSULATION_KEY( nRows ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + memcpy( pbCurr, pkMlKemkey->encodedT, cbEncodedVector ); + pbCurr += cbEncodedVector; + + memcpy( pbCurr, pkMlKemkey->publicSeed, sizeof(pkMlKemkey->publicSeed) ); + pbCurr += sizeof(pkMlKemkey->publicSeed); + } + else + { + scError = SYMCRYPT_NOT_IMPLEMENTED; + goto cleanup; + } + + SYMCRYPT_ASSERT( pbCurr == pbDst + cbDst ); + +cleanup: + return scError; +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlKemkeyGenerate( + _Inout_ PSYMCRYPT_MLKEMKEY pkMlKemkey, + UINT32 flags ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + BYTE privateSeed[SYMCRYPT_MLKEM_PRIVATE_SEED_SIZE]; + PBYTE pbPctCipherText = NULL; + SIZE_T cbPctCipherText = 0; + + // Ensure only allowed flags are specified + UINT32 allowedFlags = SYMCRYPT_FLAG_KEY_NO_FIPS; + + if ( ( flags & ~allowedFlags ) != 0 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + scError = SymCryptCallbackRandom( privateSeed, sizeof(privateSeed) ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + scError = SymCryptMlKemkeySetValue( privateSeed, sizeof(privateSeed), SYMCRYPT_MLKEMKEY_FORMAT_PRIVATE_SEED, flags, pkMlKemkey ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // SymCryptMlKemkeySetValue ensures the self-test is run before + // first operational use of MlKem + + if( ( flags & SYMCRYPT_FLAG_KEY_NO_FIPS ) == 0 ) + { + // PCT on key generation, encaps/decaps and check that both parties get the same shared secret with the generated key + SIZE_T cbU, cbV; + const UINT32 nRows = pkMlKemkey->params.nRows; + const UINT32 nBitsOfU = pkMlKemkey->params.nBitsOfU; + const UINT32 nBitsOfV = pkMlKemkey->params.nBitsOfV; + + // u vector encoded with nBitsOfU * SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS bits per polynomial + cbU = nRows * nBitsOfU * (SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8); + // v polynomial encoded with nBitsOfV * SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS bits + cbV = nBitsOfV * (SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8); + cbPctCipherText = cbU + cbV; + + pbPctCipherText = SymCryptCallbackAlloc( cbPctCipherText ); + if( pbPctCipherText == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + C_ASSERT( SYMCRYPT_MLKEM_PRIVATE_SEED_SIZE >= 2*SYMCRYPT_MLKEM_SIZEOF_AGREED_SECRET ); + + // reuse bytes 0..31 of privateSeed buffer for encapsulation shared secret + scError = SymCryptMlKemEncapsulate( + pkMlKemkey, + &privateSeed[0], SYMCRYPT_MLKEM_SIZEOF_AGREED_SECRET, + pbPctCipherText, cbPctCipherText ); + if( scError != SYMCRYPT_NO_ERROR ) + { + scError = SYMCRYPT_FIPS_FAILURE; + goto cleanup; + } + + // reuse second 32..63 bytes of privateSeed buffer for encapsulation shared secret + scError = SymCryptMlKemDecapsulate( + pkMlKemkey, + pbPctCipherText, cbPctCipherText, + &privateSeed[SYMCRYPT_MLKEM_SIZEOF_AGREED_SECRET], SYMCRYPT_MLKEM_SIZEOF_AGREED_SECRET ); + if( scError != SYMCRYPT_NO_ERROR ) + { + scError = SYMCRYPT_FIPS_FAILURE; + goto cleanup; + } + + if( !SymCryptEqual( &privateSeed[0], &privateSeed[SYMCRYPT_MLKEM_SIZEOF_AGREED_SECRET], SYMCRYPT_MLKEM_SIZEOF_AGREED_SECRET ) ) + { + // Do not fatal on PCT failure here, as it is expected with very low probability that + // with correct keygen and encaps/decaps, the agreed secrets do not match + scError = SYMCRYPT_FIPS_FAILURE; + goto cleanup; + } + + // could track having run the PCT with a flag in pkMlKemkey->fAlgorithmInfo, + // but currently no need to do that given we don't ever defer the PCT + } + +cleanup: + if( pbPctCipherText != NULL ) + { + // Wiping is not required for security, but has low relative cost + // and better to be on the safe side for FIPS + SymCryptWipe( pbPctCipherText, cbPctCipherText ); + SymCryptCallbackFree( pbPctCipherText ); + } + + SymCryptWipeKnownSize( privateSeed, sizeof(privateSeed) ); + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlKemEncapsulateInternal( + _In_ PCSYMCRYPT_MLKEMKEY pkMlKemkey, + _Out_writes_bytes_( cbAgreedSecret ) + PBYTE pbAgreedSecret, + SIZE_T cbAgreedSecret, + _Out_writes_bytes_( cbCiphertext ) + PBYTE pbCiphertext, + SIZE_T cbCiphertext, + _In_reads_bytes_( SYMCRYPT_MLKEM_SIZEOF_ENCAPS_RANDOM ) + PCBYTE pbRandom, + _Inout_ PSYMCRYPT_MLKEM_INTERNAL_COMPUTATION_TEMPORARIES pCompTemps ) +{ + BYTE CBDSampleBuffer[3*64 + 1]; + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PSYMCRYPT_MLKEM_VECTOR pvrInner; + PSYMCRYPT_MLKEM_VECTOR pvTmp; + PSYMCRYPT_MLKEM_POLYELEMENT peTmp0, peTmp1; + PSYMCRYPT_MLKEM_POLYELEMENT_ACCUMULATOR paTmp; + PSYMCRYPT_SHA3_512_STATE pHashState = &pCompTemps->hashState0.sha3_512State; + PSYMCRYPT_SHAKE256_STATE pShakeBaseState = &pCompTemps->hashState0.shake256State; + PSYMCRYPT_SHAKE256_STATE pShakeWorkState = &pCompTemps->hashState1.shake256State; + SIZE_T cbU, cbV; + UINT32 i; + const UINT32 nRows = pkMlKemkey->params.nRows; + const UINT32 nBitsOfU = pkMlKemkey->params.nBitsOfU; + const UINT32 nBitsOfV = pkMlKemkey->params.nBitsOfV; + const UINT32 nEta1 = pkMlKemkey->params.nEta1; + const UINT32 nEta2 = pkMlKemkey->params.nEta2; + const UINT32 cbPolyElement = pkMlKemkey->params.cbPolyElement; + const UINT32 cbVector = pkMlKemkey->params.cbVector; + + // u vector encoded with nBitsOfU * SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS bits per polynomial + cbU = nRows * nBitsOfU * (SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8); + // v polynomial encoded with nBitsOfV * SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS bits + cbV = nBitsOfV * (SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8); + + if( (cbAgreedSecret != SYMCRYPT_MLKEM_SIZEOF_AGREED_SECRET) || + (cbCiphertext != cbU + cbV) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + pvrInner = SymCryptMlKemVectorCreate( pCompTemps->abVectorBuffer0, cbVector, nRows ); + SYMCRYPT_ASSERT( pvrInner != NULL ); + pvTmp = SymCryptMlKemVectorCreate( pCompTemps->abVectorBuffer1, cbVector, nRows ); + SYMCRYPT_ASSERT( pvTmp != NULL ); + peTmp0 = SymCryptMlKemPolyElementCreate( pCompTemps->abPolyElementBuffer0, cbPolyElement ); + SYMCRYPT_ASSERT( peTmp0 != NULL ); + peTmp1 = SymCryptMlKemPolyElementCreate( pCompTemps->abPolyElementBuffer1, cbPolyElement ); + SYMCRYPT_ASSERT( peTmp1 != NULL ); + paTmp = SymCryptMlKemPolyElementAccumulatorCreate( pCompTemps->abPolyElementAccumulatorBuffer, 2*cbPolyElement ); + SYMCRYPT_ASSERT( paTmp != NULL ); + + // CBDSampleBuffer = (K || rOuter) = SHA3-512(pbRandom || encapsKeyHash) + SymCryptSha3_512Init( pHashState ); + SymCryptSha3_512Append( pHashState, pbRandom, SYMCRYPT_MLKEM_SIZEOF_ENCAPS_RANDOM ); + SymCryptSha3_512Append( pHashState, pkMlKemkey->encapsKeyHash, sizeof(pkMlKemkey->encapsKeyHash) ); + SymCryptSha3_512Result( pHashState, CBDSampleBuffer ); + + // Write K to pbAgreedSecret + memcpy( pbAgreedSecret, CBDSampleBuffer, SYMCRYPT_MLKEM_SIZEOF_AGREED_SECRET ); + + // Initialize pShakeStateBase with rOuter + SymCryptShake256Init( pShakeBaseState ); + SymCryptShake256Append( pShakeBaseState, CBDSampleBuffer+cbAgreedSecret, 32 ); + + // Expand rInner vector + for( i=0; i<nRows; i++ ) + { + CBDSampleBuffer[0] = (BYTE) i; + SymCryptShake256StateCopy( pShakeBaseState, pShakeWorkState ); + SymCryptShake256Append( pShakeWorkState, CBDSampleBuffer, 1 ); + + SymCryptShake256Extract( pShakeWorkState, CBDSampleBuffer, 64ul*nEta1, FALSE ); + + SymCryptMlKemPolyElementSampleCBDFromBytes( CBDSampleBuffer, nEta1, SYMCRYPT_INTERNAL_MLKEM_VECTOR_ELEMENT(i, pvrInner) ); + } + + // Perform NTT on rInner + SymCryptMlKemVectorNTT( pvrInner ); + + // Set pvTmp to 0 + SymCryptMlKemVectorSetZero( pvTmp ); + + // pvTmp = (Atranspose o rInner) ./ R + SymCryptMlKemMatrixVectorMontMulAndAdd( pkMlKemkey->pmAtranspose, pvrInner, pvTmp, paTmp ); + + // pvTmp = INTT(Atranspose o rInner) + SymCryptMlKemVectorINTTAndMulR( pvTmp ); + + // Expand e1 and add it to pvTmp - do addition PolyElement-wise to reduce memory usage + for( i=0; i<nRows; i++ ) + { + CBDSampleBuffer[0] = (BYTE) (nRows+i); + SymCryptShake256StateCopy( pShakeBaseState, pShakeWorkState ); + SymCryptShake256Append( pShakeWorkState, CBDSampleBuffer, 1 ); + + SymCryptShake256Extract( pShakeWorkState, CBDSampleBuffer, 64ul*nEta2, FALSE ); + + SymCryptMlKemPolyElementSampleCBDFromBytes( CBDSampleBuffer, nEta2, peTmp0 ); + + SymCryptMlKemPolyElementAdd( SYMCRYPT_INTERNAL_MLKEM_VECTOR_ELEMENT(i, pvTmp), peTmp0, SYMCRYPT_INTERNAL_MLKEM_VECTOR_ELEMENT(i, pvTmp) ); + } + + // pvTmp = u = INTT(Atranspose o rInner) + e1 + // Compress and encode u into prefix of ciphertext + SymCryptMlKemVectorCompressAndEncode( pvTmp, nBitsOfU, pbCiphertext, cbU ); + + // peTmp0 = (t o r) ./ R + SymCryptMlKemVectorMontDotProduct( pkMlKemkey->pvt, pvrInner, peTmp0, paTmp ); + + // peTmp0 = INTT(t o r) + SymCryptMlKemPolyElementINTTAndMulR( peTmp0 ); + + // Expand e2 polynomial in peTmp1 + CBDSampleBuffer[0] = (BYTE) (2*nRows); + SymCryptShake256StateCopy( pShakeBaseState, pShakeWorkState ); + SymCryptShake256Append( pShakeWorkState, CBDSampleBuffer, 1 ); + + SymCryptShake256Extract( pShakeWorkState, CBDSampleBuffer, 64ul*nEta2, FALSE ); + + SymCryptMlKemPolyElementSampleCBDFromBytes( CBDSampleBuffer, nEta2, peTmp1 ); + + // peTmp = INTT(t o r) + e2 + SymCryptMlKemPolyElementAdd( peTmp0, peTmp1, peTmp0 ); + + // peTmp1 = mu + SymCryptMlKemPolyElementDecodeAndDecompress( pbRandom, 1, peTmp1 ); + + // peTmp0 = v = INTT(t o r) + e2 + mu + SymCryptMlKemPolyElementAdd( peTmp0, peTmp1, peTmp0 ); + + // Compress and encode v into remainder of ciphertext + SymCryptMlKemPolyElementCompressAndEncode( peTmp0, nBitsOfV, pbCiphertext+cbU ); + +cleanup: + SymCryptWipeKnownSize( CBDSampleBuffer, sizeof(CBDSampleBuffer) ); + + return scError; +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlKemEncapsulateEx( + _In_ PCSYMCRYPT_MLKEMKEY pkMlKemkey, + _In_reads_bytes_( cbRandom ) PCBYTE pbRandom, + SIZE_T cbRandom, + _Out_writes_bytes_( cbAgreedSecret ) PBYTE pbAgreedSecret, + SIZE_T cbAgreedSecret, + _Out_writes_bytes_( cbCiphertext ) PBYTE pbCiphertext, + SIZE_T cbCiphertext ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PSYMCRYPT_MLKEM_INTERNAL_COMPUTATION_TEMPORARIES pCompTemps = NULL; + + if( cbRandom != SYMCRYPT_MLKEM_SIZEOF_ENCAPS_RANDOM ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + pCompTemps = SymCryptCallbackAlloc( sizeof(SYMCRYPT_MLKEM_INTERNAL_COMPUTATION_TEMPORARIES) ); + if( pCompTemps == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + scError = SymCryptMlKemEncapsulateInternal( + pkMlKemkey, + pbAgreedSecret, cbAgreedSecret, + pbCiphertext, cbCiphertext, + pbRandom, + pCompTemps ); + +cleanup: + if( pCompTemps != NULL ) + { + SymCryptWipe( pCompTemps, sizeof(*pCompTemps) ); + SymCryptCallbackFree( pCompTemps ); + } + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlKemEncapsulate( + _In_ PCSYMCRYPT_MLKEMKEY pkMlKemkey, + _Out_writes_bytes_( cbAgreedSecret ) PBYTE pbAgreedSecret, + SIZE_T cbAgreedSecret, + _Out_writes_bytes_( cbCiphertext ) PBYTE pbCiphertext, + SIZE_T cbCiphertext ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + BYTE pbm[SYMCRYPT_MLKEM_SIZEOF_ENCAPS_RANDOM]; + + scError = SymCryptCallbackRandom( pbm, sizeof(pbm) ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + scError = SymCryptMlKemEncapsulateEx( + pkMlKemkey, + pbm, sizeof(pbm), + pbAgreedSecret, cbAgreedSecret, + pbCiphertext, cbCiphertext ); + +cleanup: + SymCryptWipeKnownSize( pbm, sizeof(pbm) ); + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlKemDecapsulate( + _In_ PCSYMCRYPT_MLKEMKEY pkMlKemkey, + _In_reads_bytes_( cbCiphertext ) PCBYTE pbCiphertext, + SIZE_T cbCiphertext, + _Out_writes_bytes_( cbAgreedSecret ) PBYTE pbAgreedSecret, + SIZE_T cbAgreedSecret ) +{ + PSYMCRYPT_MLKEM_INTERNAL_COMPUTATION_TEMPORARIES pCompTemps = NULL; + BYTE pbDecryptedRandom[SYMCRYPT_MLKEM_SIZEOF_ENCAPS_RANDOM]; + BYTE pbDecapsulatedSecret[SYMCRYPT_MLKEM_SIZEOF_AGREED_SECRET]; + BYTE pbImplicitRejectionSecret[SYMCRYPT_MLKEM_SIZEOF_AGREED_SECRET]; + PBYTE pbReadCiphertext, pbReencapsulatedCiphertext; + BOOLEAN successfulReencrypt; + + PBYTE pbCurr; + PBYTE pbAlloc = NULL; + const SIZE_T cbAlloc = sizeof(SYMCRYPT_MLKEM_INTERNAL_COMPUTATION_TEMPORARIES) + (2*cbCiphertext); + + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + SIZE_T cbU, cbV, cbCopy; + PSYMCRYPT_MLKEM_VECTOR pvu; + PSYMCRYPT_MLKEM_POLYELEMENT peTmp0, peTmp1; + PSYMCRYPT_MLKEM_POLYELEMENT_ACCUMULATOR paTmp; + PSYMCRYPT_SHAKE256_STATE pShakeState; + const UINT32 nRows = pkMlKemkey->params.nRows; + const UINT32 nBitsOfU = pkMlKemkey->params.nBitsOfU; + const UINT32 nBitsOfV = pkMlKemkey->params.nBitsOfV; + const UINT32 cbPolyElement = pkMlKemkey->params.cbPolyElement; + const UINT32 cbVector = pkMlKemkey->params.cbVector; + + // u vector encoded with nBitsOfU * SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS bits per polynomial + cbU = nRows * nBitsOfU * (SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8); + // v polynomial encoded with nBitsOfV * SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS bits + cbV = nBitsOfV * (SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8); + + if( (cbAgreedSecret != SYMCRYPT_MLKEM_SIZEOF_AGREED_SECRET) || + (cbCiphertext != cbU + cbV) || + !pkMlKemkey->hasPrivateKey ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + pbAlloc = SymCryptCallbackAlloc( cbAlloc ); + if( pbAlloc == NULL ) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + pbCurr = pbAlloc; + + pCompTemps = (PSYMCRYPT_MLKEM_INTERNAL_COMPUTATION_TEMPORARIES) pbCurr; + pbCurr += sizeof(SYMCRYPT_MLKEM_INTERNAL_COMPUTATION_TEMPORARIES); + + pbReadCiphertext = pbCurr; + pbCurr += cbCiphertext; + + pbReencapsulatedCiphertext = pbCurr; + pbCurr += cbCiphertext; + + SYMCRYPT_ASSERT( pbCurr == (pbAlloc + cbAlloc) ); + + // Read the input ciphertext once to local pbReadCiphertext to ensure our view of ciphertext consistent + memcpy( pbReadCiphertext, pbCiphertext, cbCiphertext ); + + pvu = SymCryptMlKemVectorCreate( pCompTemps->abVectorBuffer0, cbVector, nRows ); + SYMCRYPT_ASSERT( pvu != NULL ); + peTmp0 = SymCryptMlKemPolyElementCreate( pCompTemps->abPolyElementBuffer0, cbPolyElement ); + SYMCRYPT_ASSERT( peTmp0 != NULL ); + peTmp1 = SymCryptMlKemPolyElementCreate( pCompTemps->abPolyElementBuffer1, cbPolyElement ); + SYMCRYPT_ASSERT( peTmp1 != NULL ); + paTmp = SymCryptMlKemPolyElementAccumulatorCreate( pCompTemps->abPolyElementAccumulatorBuffer, 2*cbPolyElement ); + SYMCRYPT_ASSERT( paTmp != NULL ); + + // Decode and decompress u + scError = SymCryptMlKemVectorDecodeAndDecompress( pbReadCiphertext, cbU, nBitsOfU, pvu ); + SYMCRYPT_ASSERT( scError == SYMCRYPT_NO_ERROR ); + + // Perform NTT on u + SymCryptMlKemVectorNTT( pvu ); + + // peTmp0 = (s o NTT(u)) ./ R + SymCryptMlKemVectorMontDotProduct( pkMlKemkey->pvs, pvu, peTmp0, paTmp ); + + // peTmp0 = INTT(s o NTT(u)) + SymCryptMlKemPolyElementINTTAndMulR( peTmp0 ); + + // Decode and decompress v + scError = SymCryptMlKemPolyElementDecodeAndDecompress( pbReadCiphertext+cbU, nBitsOfV, peTmp1 ); + SYMCRYPT_ASSERT( scError == SYMCRYPT_NO_ERROR ); + + // peTmp0 = w = v - INTT(s o NTT(u)) + SymCryptMlKemPolyElementSub( peTmp1, peTmp0, peTmp0 ); + + // pbDecryptedRandom = m' = Encoding of w + SymCryptMlKemPolyElementCompressAndEncode( peTmp0, 1, pbDecryptedRandom ); + + // Compute: + // pbDecapsulatedSecret = K' = Decapsulated secret (without implicit rejection) + // pbReencapsulatedCiphertext = c' = Ciphertext from re-encapsulating decrypted random value + scError = SymCryptMlKemEncapsulateInternal( + pkMlKemkey, + pbDecapsulatedSecret, sizeof(pbDecapsulatedSecret), + pbReencapsulatedCiphertext, cbCiphertext, + pbDecryptedRandom, + pCompTemps ); + SYMCRYPT_ASSERT( scError == SYMCRYPT_NO_ERROR ); + + // Compute the secret we will return if using implicit rejection + // pbImplicitRejectionSecret = K_bar = SHAKE256( z || c ) + pShakeState = &pCompTemps->hashState0.shake256State; + SymCryptShake256Init( pShakeState ); + SymCryptShake256Append( pShakeState, pkMlKemkey->privateRandom, sizeof(pkMlKemkey->privateRandom) ); + SymCryptShake256Append( pShakeState, pbReadCiphertext, cbCiphertext ); + SymCryptShake256Extract( pShakeState, pbImplicitRejectionSecret, sizeof(pbImplicitRejectionSecret), FALSE ); + + // Constant time test if re-encryption successful + successfulReencrypt = SymCryptEqual( pbReencapsulatedCiphertext, pbReadCiphertext, cbCiphertext ); + + // If not successful, perform side-channel-safe copy of Implicit Rejection secret over Decapsulated secret + cbCopy = (((SIZE_T)successfulReencrypt)-1) & SYMCRYPT_MLKEM_SIZEOF_AGREED_SECRET; + SymCryptScsCopy( pbImplicitRejectionSecret, cbCopy, pbDecapsulatedSecret, SYMCRYPT_MLKEM_SIZEOF_AGREED_SECRET ); + + // Write agreed secret (with implicit rejection) to pbAgreedSecret + memcpy( pbAgreedSecret, pbDecapsulatedSecret, SYMCRYPT_MLKEM_SIZEOF_AGREED_SECRET ); + +cleanup: + if( pbAlloc != NULL ) + { + SymCryptWipe( pbAlloc, cbAlloc ); + SymCryptCallbackFree( pbAlloc ); + } + + SymCryptWipeKnownSize( pbDecryptedRandom, sizeof(pbDecryptedRandom) ); + SymCryptWipeKnownSize( pbDecapsulatedSecret, sizeof(pbDecapsulatedSecret) ); + SymCryptWipeKnownSize( pbImplicitRejectionSecret, sizeof(pbImplicitRejectionSecret) ); + + return scError; +} diff --git a/libs/symcrypt/lib/mlkem_primitives.c b/libs/symcrypt/lib/mlkem_primitives.c new file mode 100644 index 00000000000..ab8a6e86c82 --- /dev/null +++ b/libs/symcrypt/lib/mlkem_primitives.c @@ -0,0 +1,1442 @@ +// +// mlkem_primitives.c ML-KEM related functionality +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +// +// Current approach is to represent polynomial ring elements as a 512-byte buffer (256 UINT16s). +// + +// Coefficients are added and subtracted when polynomials are in the NTT domain and in the lattice domain. +// +// Coefficients are only multiplied in the NTT/INTT operations, and in MulAdd which only operates on +// polynomials in NTT form. +// We choose to perform modular multiplication exclusively using Montgomery multiplication, that is, we choose +// a Montgomery divisor R, and modular multiplication always divides by R, as this make reduction logic easy +// and quick. +// i.e. MontMul(a,b) -> ((a*b) / R) mod Q +// +// For powers of Zeta used in as multiplication twiddle factors in NTT/INTT and base polynomial multiplication, +// we pre-multiply the constants by R s.t. +// MontMul(x, twiddleForZetaToTheK) -> x*(Zeta^K) mod Q. +// +// Most other modular multiplication can be done with a fixup deferred until the INTT. The one exception is in key +// generation, where A o s + e = t, we need to pre-multiply s' + +// R = 2^16 +const UINT32 SYMCRYPT_MLKEM_Rlog2 = 16; +const UINT32 SYMCRYPT_MLKEM_Rmask = 0xffff; + +// NegQInvModR = -Q^(-1) mod R +const UINT32 SYMCRYPT_MLKEM_NegQInvModR = 3327; + +// Rsqr = R^2 = (1<<32) mod Q +const UINT32 SYMCRYPT_MLKEM_Rsqr = 1353; +// RsqrTimesNegQInvModR = R^2 = ((1<<32) mod Q) * -Q^(-1) mod R +const UINT32 SYMCRYPT_MLKEM_RsqrTimesNegQInvModR = 44983; + +// +// Zeta tables. +// Zeta = 17, which is a primitive 256-th root of unity modulo Q +// +// In ML-KEM we use powers of zeta to convert to and from NTT form +// and to perform multiplication between polynomials in NTT form +// + +// This table is a lookup for (Zeta^(BitRev(index)) * R) mod Q +// Used in NTT and INTT +// i.e. element 1 is Zeta^(BitRev(1)) * (2^16) mod Q == (17^64)*(2^16) mod 3329 == 2571 +// +// MlKemZetaBitRevTimesR = [ (pow(17, bitRev(i), 3329) << 16) % 3329 for i in range(128) ] +const UINT16 MlKemZetaBitRevTimesR[128] = +{ + 2285, 2571, 2970, 1812, 1493, 1422, 287, 202, + 3158, 622, 1577, 182, 962, 2127, 1855, 1468, + 573, 2004, 264, 383, 2500, 1458, 1727, 3199, + 2648, 1017, 732, 608, 1787, 411, 3124, 1758, + 1223, 652, 2777, 1015, 2036, 1491, 3047, 1785, + 516, 3321, 3009, 2663, 1711, 2167, 126, 1469, + 2476, 3239, 3058, 830, 107, 1908, 3082, 2378, + 2931, 961, 1821, 2604, 448, 2264, 677, 2054, + 2226, 430, 555, 843, 2078, 871, 1550, 105, + 422, 587, 177, 3094, 3038, 2869, 1574, 1653, + 3083, 778, 1159, 3182, 2552, 1483, 2727, 1119, + 1739, 644, 2457, 349, 418, 329, 3173, 3254, + 817, 1097, 603, 610, 1322, 2044, 1864, 384, + 2114, 3193, 1218, 1994, 2455, 220, 2142, 1670, + 2144, 1799, 2051, 794, 1819, 2475, 2459, 478, + 3221, 3021, 996, 991, 958, 1869, 1522, 1628, +}; + +// This table is a lookup for ((Zeta^(BitRev(index)) * R) mod Q) * -Q^(-1) mod R +// Used in NTT and INTT +// +// MlKemZetaBitRevTimesRTimesNegQInvModR = [ (((pow(17, bitRev(i), Q) << 16) % Q) * 3327) & 0xffff for i in range(128) ] +const UINT16 MlKemZetaBitRevTimesRTimesNegQInvModR[128] = +{ + 19, 34037, 50790, 64748, 52011, 12402, 37345, 16694, + 20906, 37778, 3799, 15690, 54846, 64177, 11201, 34372, + 5827, 48172, 26360, 29057, 59964, 1102, 44097, 26241, + 28072, 41223, 10532, 56736, 47109, 56677, 38860, 16162, + 5689, 6516, 64039, 34569, 23564, 45357, 44825, 40455, + 12796, 38919, 49471, 12441, 56401, 649, 25986, 37699, + 45652, 28249, 15886, 8898, 28309, 56460, 30198, 47286, + 52109, 51519, 29155, 12756, 48704, 61224, 24155, 17914, + 334, 54354, 11477, 52149, 32226, 14233, 45042, 21655, + 27738, 52405, 64591, 4586, 14882, 42443, 59354, 60043, + 33525, 32502, 54905, 35218, 36360, 18741, 28761, 52897, + 18485, 45436, 47975, 47011, 14430, 46007, 5275, 12618, + 31183, 45239, 40101, 63390, 7382, 50180, 41144, 32384, + 20926, 6279, 54590, 14902, 41321, 11044, 48546, 51066, + 55200, 21497, 7933, 20198, 22501, 42325, 54629, 17442, + 33899, 23859, 36892, 20257, 41538, 57779, 17422, 42404, +}; + +// This table is a lookup for ((Zeta^(2*BitRev(index) + 1) * R) mod Q) +// Used in multiplication of 2 NTT-form polynomials +// +// zetaTwoTimesBitRevPlus1TimesR = [ (pow(17, 2*bitRev(i)+1, 3329) << 16) % 3329 for i in range(128) ] +const UINT16 zetaTwoTimesBitRevPlus1TimesR[128] = +{ + 2226, 1103, 430, 2899, 555, 2774, 843, 2486, + 2078, 1251, 871, 2458, 1550, 1779, 105, 3224, + 422, 2907, 587, 2742, 177, 3152, 3094, 235, + 3038, 291, 2869, 460, 1574, 1755, 1653, 1676, + 3083, 246, 778, 2551, 1159, 2170, 3182, 147, + 2552, 777, 1483, 1846, 2727, 602, 1119, 2210, + 1739, 1590, 644, 2685, 2457, 872, 349, 2980, + 418, 2911, 329, 3000, 3173, 156, 3254, 75, + 817, 2512, 1097, 2232, 603, 2726, 610, 2719, + 1322, 2007, 2044, 1285, 1864, 1465, 384, 2945, + 2114, 1215, 3193, 136, 1218, 2111, 1994, 1335, + 2455, 874, 220, 3109, 2142, 1187, 1670, 1659, + 2144, 1185, 1799, 1530, 2051, 1278, 794, 2535, + 1819, 1510, 2475, 854, 2459, 870, 478, 2851, + 3221, 108, 3021, 308, 996, 2333, 991, 2338, + 958, 2371, 1869, 1460, 1522, 1807, 1628, 1701, +}; + +PSYMCRYPT_MLKEM_POLYELEMENT +SYMCRYPT_CALL +SymCryptMlKemPolyElementCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + UINT32 cbBuffer ) +{ + PSYMCRYPT_MLKEM_POLYELEMENT pDst = (PSYMCRYPT_MLKEM_POLYELEMENT) pbBuffer; + + UNREFERENCED_PARAMETER( cbBuffer ); + + SYMCRYPT_ASSERT_ASYM_ALIGNED( pbBuffer ); + SYMCRYPT_ASSERT( cbBuffer == SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT ); + + return pDst; +} + +PSYMCRYPT_MLKEM_POLYELEMENT_ACCUMULATOR +SYMCRYPT_CALL +SymCryptMlKemPolyElementAccumulatorCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + UINT32 cbBuffer ) +{ + PSYMCRYPT_MLKEM_POLYELEMENT_ACCUMULATOR pDst = (PSYMCRYPT_MLKEM_POLYELEMENT_ACCUMULATOR) pbBuffer; + + UNREFERENCED_PARAMETER( cbBuffer ); + + SYMCRYPT_ASSERT_ASYM_ALIGNED( pbBuffer ); + SYMCRYPT_ASSERT( cbBuffer == SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT_ACCUMULATOR ); + + return pDst; +} + +PSYMCRYPT_MLKEM_VECTOR +SYMCRYPT_CALL +SymCryptMlKemVectorCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + UINT32 cbBuffer, + UINT32 nRows ) +{ + PSYMCRYPT_MLKEM_VECTOR pDst = NULL; + PSYMCRYPT_MLKEM_VECTOR pVector = (PSYMCRYPT_MLKEM_VECTOR)pbBuffer; + PSYMCRYPT_MLKEM_POLYELEMENT peTmp = NULL; + UINT32 i; + PBYTE pbTmp = pbBuffer + sizeof(SYMCRYPT_MLKEM_VECTOR); + + SYMCRYPT_ASSERT_ASYM_ALIGNED( pbBuffer ); + + SYMCRYPT_ASSERT( nRows > 0 ); + SYMCRYPT_ASSERT( nRows <= SYMCRYPT_MLKEM_MATRIX_MAX_NROWS ); + + pVector->nRows = nRows; + pVector->cbTotalSize = cbBuffer; + + for( i=0; i<nRows; i++ ) + { + peTmp = SymCryptMlKemPolyElementCreate( pbTmp, SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT ); + if( peTmp == NULL ) + { + goto cleanup; + } + + pbTmp += SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT; + } + + SYMCRYPT_ASSERT( pbTmp == (pbBuffer + cbBuffer) ); + + pDst = pVector; + +cleanup: + return pDst; +} + +PSYMCRYPT_MLKEM_MATRIX +SYMCRYPT_CALL +SymCryptMlKemMatrixCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + UINT32 cbBuffer, + UINT32 nRows ) +{ + PSYMCRYPT_MLKEM_MATRIX pDst = NULL; + PSYMCRYPT_MLKEM_MATRIX pMatrix = (PSYMCRYPT_MLKEM_MATRIX)pbBuffer; + UINT32 i; + PBYTE pbTmp = pbBuffer + sizeof(SYMCRYPT_MLKEM_MATRIX); + + SYMCRYPT_ASSERT_ASYM_ALIGNED( pbBuffer ); + + SYMCRYPT_ASSERT( nRows > 0 ); + SYMCRYPT_ASSERT( nRows <= SYMCRYPT_MLKEM_MATRIX_MAX_NROWS ); + + pMatrix->nRows = nRows; + pMatrix->cbTotalSize = cbBuffer; + + for( i=0; i<(nRows*nRows); i++ ) + { + pMatrix->apPolyElements[i] = SymCryptMlKemPolyElementCreate( pbTmp, SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT ); + if( pMatrix->apPolyElements[i] == NULL ) + { + goto cleanup; + } + + pbTmp += SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT; + } + + SYMCRYPT_ASSERT( pbTmp == (pbBuffer + cbBuffer) ); + + pDst = pMatrix; + +cleanup: + return pDst; +} + +#if SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_ARM64 + +#if SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_X86 + +#ifdef __clang__ +#pragma clang attribute push (__attribute__((target("sse2"))), apply_to=function) +#else +#pragma GCC push_options +#pragma GCC target("sse2") +#endif + +#define VEC128_TYPE_UINT16 __m128i + +#define VEC128_LOAD_UINT16( addr ) _mm_loadu_si128( (__m128i*) (addr) ) +#define VEC64_LOAD_UINT16( addr ) _mm_loadu_si64( (PBYTE) (addr) ) +#define VEC32_LOAD_UINT16( addr ) _mm_cvtsi32_si128( SYMCRYPT_LOAD_LSBFIRST32( addr ) ) + +#define VEC128_STORE_UINT16( addr, vec ) _mm_storeu_si128( (__m128i*) (addr), (vec) ) +#define VEC64_STORE_UINT16( addr, vec ) _mm_storeu_si64( (PBYTE) (addr), (vec) ) +#define VEC32_STORE_UINT16( addr, vec ) SYMCRYPT_STORE_LSBFIRST32( (addr), _mm_cvtsi128_si32( vec ) ) + +#define VEC128_SET_UINT16( value ) _mm_set1_epi16( (value) ) + +#define VEC128_MOD_SUB_UINT16( res, a, b, Q, zero, tmp1 ) \ + /* res = a - b */ \ + res = _mm_sub_epi16( a, b ); \ + /* tmp1 = (a - b) < 0 ? -1 : 0 */ \ + tmp1 = _mm_cmpgt_epi16( zero, res ); \ + /* tmp1 = (a - b) < 0 ? Q : 0 */ \ + tmp1 = _mm_and_si128( tmp1, Q ); \ + /* res = (a - b) mod Q */ \ + res = _mm_add_epi16( res, tmp1 ); + +#define VEC128_MOD_ADD_UINT16( res, a, b, Q, tmp1 ) \ + /* res = a + b */ \ + res = _mm_add_epi16( a, b ); \ + /* tmp1 = (a + b) < Q ? -1 : 0 */ \ + tmp1 = _mm_cmpgt_epi16( Q, res ); \ + /* tmp1 = (a + b) < Q ? 0 : Q */ \ + tmp1 = _mm_andnot_si128( tmp1, Q ); \ + /* res = (a + b) mod Q */ \ + res = _mm_sub_epi16( res, tmp1 ); + +#define VEC128_MONTGOMERY_MUL_UINT16( res, a, b, bTimesNegQInvModR, Q, zero, one, tmp1, tmp2 ) \ + /* tmp1 = a *low bTimesNegQInvModR */ \ + tmp1 = _mm_mullo_epi16( a, bTimesNegQInvModR ); \ + /* res = a *high b */ \ + res = _mm_mulhi_epu16( a, b ); \ + /* tmp2 = (tmp1 == 0) ? -1 : 0 */ \ + tmp2 = _mm_cmpeq_epi16( tmp1, zero ); \ + /* tmp1 = (a *low bTimesNegQInvModR) *high Q */ \ + tmp1 = _mm_mulhi_epu16( tmp1, Q ); \ + /* res = a *high b + 1 */ \ + res = _mm_add_epi16( res, one ); \ + /* res = a *high b (+ 1 if a != 0) */ \ + res = _mm_add_epi16( res, tmp2 ); \ + /* res = a *high b + inv*Q (+ 1 if a != 0) */ \ + res = _mm_add_epi16( res, tmp1 ); \ + /* res = (a*b + inv*Q >> 16) mod Q */ \ + VEC128_MOD_SUB_UINT16( res, res, Q, Q, zero, tmp1 ); + +#elif SYMCRYPT_CPU_ARM64 + +#define VEC128_TYPE_UINT16 uint16x8_t + +#define VEC128_LOAD_UINT16( addr ) vld1q_u16( addr ) +#define VEC64_LOAD_UINT16( addr ) vld1q_dup_u64( addr ) +#define VEC32_LOAD_UINT16( addr ) vld1q_dup_u32( addr ) + +#define VEC128_STORE_UINT16( addr, vec ) vst1q_u16( (addr), (vec) ) +#define VEC64_STORE_UINT16( addr, vec ) vst1_u16( (uint16_t*) (addr), vget_low_u16(vec) ) +#define VEC32_STORE_UINT16( addr, vec ) vst1_lane_u32( (PBYTE) (addr), vget_low_u32(vec), 0 ) + +#define VEC128_SET_UINT16( value ) vdupq_n_u16( (value) ) + +#define VEC128_MOD_SUB_UINT16( res, a, b, Q, zero, tmp1 ) \ + /* res = a - b */ \ + res = vsubq_u16( a, b ); \ + /* tmp1 = (a - b) < 0 ? -1 : 0 */ \ + tmp1 = vcltzq_s16( res ); \ + /* tmp1 = (a - b) < 0 ? Q : 0 */ \ + tmp1 = vandq_u16( tmp1, Q ); \ + /* res = (a - b) mod Q */ \ + res = vaddq_u16( res, tmp1 ); + +#define VEC128_MOD_ADD_UINT16( res, a, b, Q, tmp1 ) \ + /* res = a + b */ \ + res = vaddq_u16( a, b ); \ + /* tmp1 = (a + b) >= Q ? -1 : 0 */ \ + tmp1 = vcgeq_u16( res, Q ); \ + /* tmp1 = (a + b) >= Q ? Q : 0 */ \ + tmp1 = vandq_u16( tmp1, Q ); \ + /* res = (a + b) mod Q */ \ + res = vsubq_u16( res, tmp1 ); + +#define VEC128_MONTGOMERY_MUL_UINT16( res, a, b, bTimesNegQInvModR, Q, zero, one, tmp1, tmp2 ) \ + /* tmp1 = a *low bTimesNegQInvModR */ \ + tmp1 = vmulq_u16( a, bTimesNegQInvModR ); \ + /* tmp2 = a*b [0-3]*/ \ + tmp2 = vmull_u16( vget_low_u16(a), vget_low_u16(b) ); \ + /* res = a*b [4-7]*/ \ + res = vmull_high_u16( a, b ); \ + /* tmp2 = a*b + inv*Q [0-3]*/ \ + tmp2 = vmlal_u16( tmp2, vget_low_u16(tmp1), vget_low_u16(Q) ); \ + /* res = a*b + inv*Q [4-7]*/ \ + res = vmlal_high_u16( res, tmp1, Q ); \ + /* res = a*b + inv*Q >> 16 */ \ + res = vuzp2q_u16( tmp2, res ); \ + /* res = (a*b + inv*Q >> 16) mod Q */ \ + VEC128_MOD_SUB_UINT16( res, res, Q, Q, zero, tmp1 ); + +#endif + +FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptMlKemPolyElementNTTLayerVec128( + _Inout_ PSYMCRYPT_MLKEM_POLYELEMENT peSrc, + UINT32 k, + UINT32 len ) +{ + UINT32 start, j; + VEC128_TYPE_UINT16 vc0, vc1, vTmp0, vTmp1, vc1Twiddle, vTwiddleFactor, vTwiddleFactorMont, vQ, vZero, vOne; + + SYMCRYPT_ASSERT( len >= 2 ); + + vQ = VEC128_SET_UINT16( SYMCRYPT_MLKEM_Q ); + vZero = VEC128_SET_UINT16( 0 ); + vOne = VEC128_SET_UINT16( 1 ); + + for( start=0; start<256; start+=(2*len) ) + { + vTwiddleFactor = VEC128_SET_UINT16( MlKemZetaBitRevTimesR[k] ); + vTwiddleFactorMont = VEC128_SET_UINT16( MlKemZetaBitRevTimesRTimesNegQInvModR[k] ); + k++; + for( j=0; j<len; j+=8 ) + { + if( len >= 8 ) + { + vc0 = VEC128_LOAD_UINT16( &(peSrc->coeffs[start+j] ) ); + vc1 = VEC128_LOAD_UINT16( &(peSrc->coeffs[start+j+len]) ); + } + else if ( len == 4 ) + { + vc0 = VEC64_LOAD_UINT16( &(peSrc->coeffs[start+j] ) ); + vc1 = VEC64_LOAD_UINT16( &(peSrc->coeffs[start+j+len]) ); + } + else /*if ( len == 2 )*/ + { + vc0 = VEC32_LOAD_UINT16( &(peSrc->coeffs[start+j] ) ); + vc1 = VEC32_LOAD_UINT16( &(peSrc->coeffs[start+j+len]) ); + } + + // c1TimesTwiddle = twiddleFactor * c1 mod Q; + VEC128_MONTGOMERY_MUL_UINT16( vc1Twiddle, vc1, vTwiddleFactor, vTwiddleFactorMont, vQ, vZero, vOne, vTmp0, vTmp1 ); + // c1 = c0 - c1TimesTwiddle mod Q + VEC128_MOD_SUB_UINT16( vc1, vc0, vc1Twiddle, vQ, vZero, vTmp0 ); + // c0 = c0 + c1TimesTwiddle mod Q + VEC128_MOD_ADD_UINT16( vc0, vc0, vc1Twiddle, vQ, vTmp1 ); + + if( len >= 8 ) + { + VEC128_STORE_UINT16( &(peSrc->coeffs[start+j] ), vc0 ); + VEC128_STORE_UINT16( &(peSrc->coeffs[start+j+len]), vc1 ); + } + else if ( len == 4 ) + { + VEC64_STORE_UINT16( &(peSrc->coeffs[start+j] ), vc0 ); + VEC64_STORE_UINT16( &(peSrc->coeffs[start+j+len]), vc1 ); + } + else /*if ( len == 2 )*/ + { + VEC32_STORE_UINT16( &(peSrc->coeffs[start+j] ), vc0 ); + VEC32_STORE_UINT16( &(peSrc->coeffs[start+j+len]), vc1 ); + } + } + } +} + +FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptMlKemPolyElementINTTLayerVec128( + _Inout_ PSYMCRYPT_MLKEM_POLYELEMENT peSrc, + UINT32 k, + UINT32 len ) +{ + UINT32 start, j; + VEC128_TYPE_UINT16 vc0, vc1, vTmp0, vTmp1, vTmp2, vTwiddleFactor, vTwiddleFactorMont, vQ, vZero, vOne; + + SYMCRYPT_ASSERT( len >= 2 ); + + vQ = VEC128_SET_UINT16( SYMCRYPT_MLKEM_Q ); + vZero = VEC128_SET_UINT16( 0 ); + vOne = VEC128_SET_UINT16( 1 ); + + for( start=0; start<256; start+=(2*len) ) + { + vTwiddleFactor = VEC128_SET_UINT16( MlKemZetaBitRevTimesR[k] ); + vTwiddleFactorMont = VEC128_SET_UINT16( MlKemZetaBitRevTimesRTimesNegQInvModR[k] ); + k--; + for( j=0; j<len; j+=8 ) + { + if( len >= 8 ) + { + vc0 = VEC128_LOAD_UINT16( &(peSrc->coeffs[start+j] ) ); + vc1 = VEC128_LOAD_UINT16( &(peSrc->coeffs[start+j+len]) ); + } + else if ( len == 4 ) + { + vc0 = VEC64_LOAD_UINT16( &(peSrc->coeffs[start+j] ) ); + vc1 = VEC64_LOAD_UINT16( &(peSrc->coeffs[start+j+len]) ); + } + else /*if ( len == 2 )*/ + { + vc0 = VEC32_LOAD_UINT16( &(peSrc->coeffs[start+j] ) ); + vc1 = VEC32_LOAD_UINT16( &(peSrc->coeffs[start+j+len]) ); + } + + // tmp = c0 + c1 mod Q + VEC128_MOD_ADD_UINT16( vTmp2, vc0, vc1, vQ, vTmp0 ); + // c1 = c1 - c0 mod Q + VEC128_MOD_SUB_UINT16( vc1, vc1, vc0, vQ, vZero, vTmp1 ); + // c1 = twiddleFactor * c1; + VEC128_MONTGOMERY_MUL_UINT16( vc1, vc1, vTwiddleFactor, vTwiddleFactorMont, vQ, vZero, vOne, vTmp0, vTmp1 ); + + if( len >= 8 ) + { + VEC128_STORE_UINT16( &(peSrc->coeffs[start+j] ), vTmp2 ); + VEC128_STORE_UINT16( &(peSrc->coeffs[start+j+len]), vc1 ); + } + else if ( len == 4 ) + { + VEC64_STORE_UINT16( &(peSrc->coeffs[start+j] ), vTmp2 ); + VEC64_STORE_UINT16( &(peSrc->coeffs[start+j+len]), vc1 ); + } + else /*if ( len == 2 )*/ + { + VEC32_STORE_UINT16( &(peSrc->coeffs[start+j] ), vTmp2 ); + VEC32_STORE_UINT16( &(peSrc->coeffs[start+j+len]), vc1 ); + } + } + } +} + +#endif + +FORCEINLINE +UINT32 +SYMCRYPT_CALL +SymCryptMlKemModAdd( + UINT32 a, + UINT32 b ) +{ + UINT32 res; + + SYMCRYPT_ASSERT( a < SYMCRYPT_MLKEM_Q ); + SYMCRYPT_ASSERT( b < SYMCRYPT_MLKEM_Q ); + + res = a + b - SYMCRYPT_MLKEM_Q; + SYMCRYPT_ASSERT( ((res >> 16) == 0) || ((res >> 16) == 0xffff) ); + res = res + (SYMCRYPT_MLKEM_Q & (res >> 16)); + SYMCRYPT_ASSERT( res < SYMCRYPT_MLKEM_Q ); + + return res; +} + +FORCEINLINE +UINT32 +SYMCRYPT_CALL +SymCryptMlKemModSub( + UINT32 a, + UINT32 b ) +{ + UINT32 res; + + SYMCRYPT_ASSERT( a < 2*SYMCRYPT_MLKEM_Q ); + SYMCRYPT_ASSERT( b <= SYMCRYPT_MLKEM_Q ); + + res = a - b; + SYMCRYPT_ASSERT( ((res >> 16) == 0) || ((res >> 16) == 0xffff) ); + res = res + (SYMCRYPT_MLKEM_Q & (res >> 16)); + SYMCRYPT_ASSERT( res < SYMCRYPT_MLKEM_Q ); + + return res; +} + +FORCEINLINE +UINT32 +SYMCRYPT_CALL +SymCryptMlKemMontMul( + UINT32 a, + UINT32 b, + UINT32 bMont ) +{ + UINT32 res, inv; + + SYMCRYPT_ASSERT( a < SYMCRYPT_MLKEM_Q ); + SYMCRYPT_ASSERT( b < SYMCRYPT_MLKEM_Q ); + SYMCRYPT_ASSERT( bMont <= SYMCRYPT_MLKEM_Rmask ); + SYMCRYPT_ASSERT( bMont == ((b * SYMCRYPT_MLKEM_NegQInvModR) & SYMCRYPT_MLKEM_Rmask) ); + + res = a * b; + inv = (a * bMont) & SYMCRYPT_MLKEM_Rmask; + res += inv * SYMCRYPT_MLKEM_Q; + SYMCRYPT_ASSERT( (res & SYMCRYPT_MLKEM_Rmask) == 0 ); + res = res >> SYMCRYPT_MLKEM_Rlog2; + + return SymCryptMlKemModSub( res, SYMCRYPT_MLKEM_Q ); +} + +VOID +SYMCRYPT_CALL +SymCryptMlKemPolyElementNTTLayerC( + _Inout_ PSYMCRYPT_MLKEM_POLYELEMENT peSrc, + UINT32 k, + UINT32 len ) +{ + UINT32 start, j; + UINT32 twiddleFactor, twiddleFactorMont, c0, c1, c1TimesTwiddle; + + for( start=0; start<256; start+=(2*len) ) + { + twiddleFactor = MlKemZetaBitRevTimesR[k]; + twiddleFactorMont = MlKemZetaBitRevTimesRTimesNegQInvModR[k]; + k++; + for( j=0; j<len; j++ ) + { + c0 = peSrc->coeffs[start+j]; + SYMCRYPT_ASSERT( c0 < SYMCRYPT_MLKEM_Q ); + c1 = peSrc->coeffs[start+j+len]; + SYMCRYPT_ASSERT( c1 < SYMCRYPT_MLKEM_Q ); + + c1TimesTwiddle = SymCryptMlKemMontMul( c1, twiddleFactor, twiddleFactorMont ); + c1 = SymCryptMlKemModSub( c0, c1TimesTwiddle ); + c0 = SymCryptMlKemModAdd( c0, c1TimesTwiddle ); + + peSrc->coeffs[start+j] = (UINT16) c0; + peSrc->coeffs[start+j+len] = (UINT16) c1; + } + } +} + +VOID +SYMCRYPT_CALL +SymCryptMlKemPolyElementINTTLayerC( + _Inout_ PSYMCRYPT_MLKEM_POLYELEMENT peSrc, + UINT32 k, + UINT32 len ) +{ + UINT32 start, j; + UINT32 twiddleFactor, twiddleFactorMont, c0, c1, tmp; + + for( start=0; start<256; start+=(2*len) ) + { + twiddleFactor = MlKemZetaBitRevTimesR[k]; + twiddleFactorMont = MlKemZetaBitRevTimesRTimesNegQInvModR[k]; + k--; + for( j=0; j<len; j++ ) + { + c0 = peSrc->coeffs[start+j]; + SYMCRYPT_ASSERT( c0 < SYMCRYPT_MLKEM_Q ); + c1 = peSrc->coeffs[start+j+len]; + SYMCRYPT_ASSERT( c1 < SYMCRYPT_MLKEM_Q ); + + tmp = SymCryptMlKemModAdd( c0, c1 ); + c1 = SymCryptMlKemModSub( c1, c0 ); + c1 = SymCryptMlKemMontMul( c1, twiddleFactor, twiddleFactorMont ); + + peSrc->coeffs[start+j] = (UINT16) tmp; + peSrc->coeffs[start+j+len] = (UINT16) c1; + } + } +} + +FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptMlKemPolyElementNTTLayer( + _Inout_ PSYMCRYPT_MLKEM_POLYELEMENT peSrc, + UINT32 k, + UINT32 len ) +{ +#if SYMCRYPT_CPU_X86 + SYMCRYPT_EXTENDED_SAVE_DATA SaveData; + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_SSE2 ) && SymCryptSaveXmm( &SaveData ) == SYMCRYPT_NO_ERROR ) + { + SymCryptMlKemPolyElementNTTLayerVec128( peSrc, k, len ); + SymCryptRestoreXmm( &SaveData ); + } else { + SymCryptMlKemPolyElementNTTLayerC( peSrc, k, len ); + } +#elif SYMCRYPT_CPU_AMD64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_SSE2 ) ) + { + SymCryptMlKemPolyElementNTTLayerVec128( peSrc, k, len ); + } else { + SymCryptMlKemPolyElementNTTLayerC( peSrc, k, len ); + } +#elif SYMCRYPT_CPU_ARM64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_NEON ) ) + { + SymCryptMlKemPolyElementNTTLayerVec128( peSrc, k, len ); + } else { + SymCryptMlKemPolyElementNTTLayerC( peSrc, k, len ); + } +#else + SymCryptMlKemPolyElementNTTLayerC( peSrc, k, len ); +#endif +} + +FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptMlKemPolyElementINTTLayer( + _Inout_ PSYMCRYPT_MLKEM_POLYELEMENT peSrc, + UINT32 k, + UINT32 len ) +{ +#if SYMCRYPT_CPU_X86 + SYMCRYPT_EXTENDED_SAVE_DATA SaveData; + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_SSE2 ) && SymCryptSaveXmm( &SaveData ) == SYMCRYPT_NO_ERROR ) + { + SymCryptMlKemPolyElementINTTLayerVec128( peSrc, k, len ); + SymCryptRestoreXmm( &SaveData ); + } else { + SymCryptMlKemPolyElementINTTLayerC( peSrc, k, len ); + } +#elif SYMCRYPT_CPU_AMD64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_SSE2 ) ) + { + SymCryptMlKemPolyElementINTTLayerVec128( peSrc, k, len ); + } else { + SymCryptMlKemPolyElementINTTLayerC( peSrc, k, len ); + } +#elif SYMCRYPT_CPU_ARM64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_NEON ) ) + { + SymCryptMlKemPolyElementINTTLayerVec128( peSrc, k, len ); + } else { + SymCryptMlKemPolyElementINTTLayerC( peSrc, k, len ); + } +#else + SymCryptMlKemPolyElementINTTLayerC( peSrc, k, len ); +#endif +} + +#define SYMCRYPT_MLKEM_MaxCoeff (SYMCRYPT_MLKEM_Q - 1) +#define SYMCRYPT_MLKEM_MaxCoeffProduct (SYMCRYPT_MLKEM_MaxCoeff*SYMCRYPT_MLKEM_MaxCoeff) + +// max([ ((i*j) + ((((i*j)*NegQInvModR) & Rmask)*Q)) >> Rlog2 for i in range(Q) for j in range(Q) ]) +#define SYMCRYPT_MLKEM_MaxFirstStepReduction (3494) +// max([ ( pow(17, (2*i)+1, Q) << Rlog2 ) % Q for i in range(128) ]) +#define SYMCRYPT_MLKEM_MaxZetaTwoTimesPlus1TimesR (3254) +#define SYMCRYPT_MLKEM_MaxA1B1ZetaPow (SYMCRYPT_MLKEM_MaxFirstStepReduction*SYMCRYPT_MLKEM_MaxZetaTwoTimesPlus1TimesR) + +VOID +SYMCRYPT_CALL +SymCryptMlKemPolyElementMulAndAccumulate( + _In_ PCSYMCRYPT_MLKEM_POLYELEMENT peSrc1, + _In_ PCSYMCRYPT_MLKEM_POLYELEMENT peSrc2, + _Inout_ PSYMCRYPT_MLKEM_POLYELEMENT_ACCUMULATOR paDst ) +{ + UINT32 i; + UINT32 a0, a1, b0, b1, c0, c1; + UINT32 a0b0, a1b1, a0b1, a1b0, a1b1zetapow, inv; + + for( i=0; i<(SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 2); i++ ) + { + a0 = peSrc1->coeffs[(2*i) ]; + SYMCRYPT_ASSERT( a0 < SYMCRYPT_MLKEM_Q ); + a1 = peSrc1->coeffs[(2*i)+1]; + SYMCRYPT_ASSERT( a1 < SYMCRYPT_MLKEM_Q ); + + b0 = peSrc2->coeffs[(2*i) ]; + SYMCRYPT_ASSERT( b0 < SYMCRYPT_MLKEM_Q ); + b1 = peSrc2->coeffs[(2*i)+1]; + SYMCRYPT_ASSERT( b1 < SYMCRYPT_MLKEM_Q ); + + c0 = paDst->coeffs[(2*i) ]; + SYMCRYPT_ASSERT( c0 <= 3*(SYMCRYPT_MLKEM_MaxCoeffProduct + SYMCRYPT_MLKEM_MaxA1B1ZetaPow) ); + c1 = paDst->coeffs[(2*i)+1]; + SYMCRYPT_ASSERT( c1 <= 3*(SYMCRYPT_MLKEM_MaxCoeffProduct + SYMCRYPT_MLKEM_MaxA1B1ZetaPow) ); + + // multiplication results in range [0, MaxCoeffProduct = 3328*3328] + a0b0 = a0 * b0; + a1b1 = a1 * b1; + a0b1 = a0 * b1; + a1b0 = a1 * b0; + + // we need a1*b1*zetaTwoTimesBitRevPlus1TimesR[i] + // eagerly reduce a1*b1 with montgomery reduction + // a1b1 = red(a1*b1) -> range [0, MaxFirstStepReduction = 3494] + // (3494 is maximum result of first step of montgomery reduction of x*y for x,y in [0, 3328]) + // we do not need to do final reduction yet + inv = (a1b1 * SYMCRYPT_MLKEM_NegQInvModR) & SYMCRYPT_MLKEM_Rmask; + a1b1 = (a1b1 + (inv * SYMCRYPT_MLKEM_Q)) >> SYMCRYPT_MLKEM_Rlog2; // in range [0, MaxFirstStepReduction] + SYMCRYPT_ASSERT( a1b1 <= SYMCRYPT_MLKEM_MaxFirstStepReduction ); + + // now multiply a1b1 by power of zeta + a1b1zetapow = a1b1 * zetaTwoTimesBitRevPlus1TimesR[i]; + // MaxZetaTwoTimesPlus1TimesR = 3254 + // MaxA1B1ZetaPow = MaxFirstStepReduction*MaxZetaTwoTimesPlus1TimesR = 3494*3254 + SYMCRYPT_ASSERT( a1b1zetapow <= SYMCRYPT_MLKEM_MaxA1B1ZetaPow ); + + // sum pairs of products + a0b0 += a1b1zetapow; // a0*b0 + red(a1*b1)*zetapower in range [0, MaxCoeffProduct + MaxA1B1ZetaPow] + SYMCRYPT_ASSERT( a0b0 <= SYMCRYPT_MLKEM_MaxCoeffProduct + SYMCRYPT_MLKEM_MaxA1B1ZetaPow ); + a0b1 += a1b0; // a0*b1 + a1*b0 in range [0, 2*MaxCoeffProduct] + SYMCRYPT_ASSERT( a0b1 <= 2*SYMCRYPT_MLKEM_MaxCoeffProduct ); + + // We sum at most 4 pairs of products into an accumulator in ML-KEM + C_ASSERT( SYMCRYPT_MLKEM_MATRIX_MAX_NROWS <= 4 ); + c0 += a0b0; // in range [0,4*MaxCoeffProduct + 4*MaxA1B1ZetaPow] + SYMCRYPT_ASSERT( c0 < (4*SYMCRYPT_MLKEM_MaxCoeffProduct) + (4*SYMCRYPT_MLKEM_MaxA1B1ZetaPow) ); + c1 += a0b1; // in range [0,5*MaxCoeffProduct + 3*MaxA1B1ZetaPow] + SYMCRYPT_ASSERT( c1 < (5*SYMCRYPT_MLKEM_MaxCoeffProduct) + (3*SYMCRYPT_MLKEM_MaxA1B1ZetaPow) ); + + paDst->coeffs[(2*i) ] = c0; + paDst->coeffs[(2*i)+1] = c1; + } +} + +VOID +SYMCRYPT_CALL +SymCryptMlKemMontgomeryReduceAndAddPolyElementAccumulatorToPolyElement( + _Inout_ PSYMCRYPT_MLKEM_POLYELEMENT_ACCUMULATOR paSrc, + _Inout_ PSYMCRYPT_MLKEM_POLYELEMENT peDst ) +{ + UINT32 i; + UINT32 a, c, inv; + + for( i=0; i<SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; i++ ) + { + a = paSrc->coeffs[i]; + SYMCRYPT_ASSERT( a <= 4*(SYMCRYPT_MLKEM_MaxCoeffProduct + SYMCRYPT_MLKEM_MaxA1B1ZetaPow) ); + paSrc->coeffs[i] = 0; + + c = peDst->coeffs[i]; + SYMCRYPT_ASSERT( c < SYMCRYPT_MLKEM_Q ); + + // montgomery reduce sum of products + inv = (a * SYMCRYPT_MLKEM_NegQInvModR) & SYMCRYPT_MLKEM_Rmask; + a = (a + (inv * SYMCRYPT_MLKEM_Q)) >> SYMCRYPT_MLKEM_Rlog2; // in range [0, 4711] + SYMCRYPT_ASSERT( a <= 4711 ); + + // add destination + c += a; + SYMCRYPT_ASSERT( c <= 8039 ); + + // subtraction and conditional additions for constant time range reduction + c -= 2*SYMCRYPT_MLKEM_Q; // in range [-2Q, 1381] + SYMCRYPT_ASSERT( (c >= ((UINT32)(-2*SYMCRYPT_MLKEM_Q))) || (c < 1381) ); + c += SYMCRYPT_MLKEM_Q & (c >> 16); // in range [-Q, Q-1] + SYMCRYPT_ASSERT( (c >= ((UINT32)-SYMCRYPT_MLKEM_Q)) || (c < SYMCRYPT_MLKEM_Q) ); + c += SYMCRYPT_MLKEM_Q & (c >> 16); // in range [0, Q-1] + SYMCRYPT_ASSERT( c < SYMCRYPT_MLKEM_Q ); + + peDst->coeffs[i] = (UINT16) c; + } +} + +VOID +SYMCRYPT_CALL +SymCryptMlKemPolyElementMulR( + _In_ PCSYMCRYPT_MLKEM_POLYELEMENT peSrc, + _Out_ PSYMCRYPT_MLKEM_POLYELEMENT peDst ) +{ + UINT32 i; + for( i=0; i<SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; i++ ) + { + peDst->coeffs[i] = (UINT16) SymCryptMlKemMontMul( + peSrc->coeffs[i], SYMCRYPT_MLKEM_Rsqr, SYMCRYPT_MLKEM_RsqrTimesNegQInvModR ); + } +} + +VOID +SYMCRYPT_CALL +SymCryptMlKemPolyElementAdd( + _In_ PCSYMCRYPT_MLKEM_POLYELEMENT peSrc1, + _In_ PCSYMCRYPT_MLKEM_POLYELEMENT peSrc2, + _Out_ PSYMCRYPT_MLKEM_POLYELEMENT peDst ) +{ + UINT32 i; + for( i=0; i<SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; i++ ) + { + peDst->coeffs[i] = (UINT16) SymCryptMlKemModAdd( peSrc1->coeffs[i], peSrc2->coeffs[i] ); + } +} + +VOID +SYMCRYPT_CALL +SymCryptMlKemPolyElementSub( + _In_ PCSYMCRYPT_MLKEM_POLYELEMENT peSrc1, + _In_ PCSYMCRYPT_MLKEM_POLYELEMENT peSrc2, + _Out_ PSYMCRYPT_MLKEM_POLYELEMENT peDst ) +{ + UINT32 i; + for( i=0; i<SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; i++ ) + { + peDst->coeffs[i] = (UINT16) SymCryptMlKemModSub( peSrc1->coeffs[i], peSrc2->coeffs[i] ); + } +} + +VOID +SYMCRYPT_CALL +SymCryptMlKemPolyElementNTT( + _Inout_ PSYMCRYPT_MLKEM_POLYELEMENT peSrc ) +{ + SymCryptMlKemPolyElementNTTLayer( peSrc, 1, 128 ); + SymCryptMlKemPolyElementNTTLayer( peSrc, 2, 64 ); + SymCryptMlKemPolyElementNTTLayer( peSrc, 4, 32 ); + SymCryptMlKemPolyElementNTTLayer( peSrc, 8, 16 ); + SymCryptMlKemPolyElementNTTLayer( peSrc, 16, 8 ); + SymCryptMlKemPolyElementNTTLayer( peSrc, 32, 4 ); + SymCryptMlKemPolyElementNTTLayer( peSrc, 64, 2 ); +} + +// INTTFixupTimesRsqr = R^2 * 3303 = (3303<<32) mod Q +// 3303 constant is fixup from FIPS 203 +// Multiplied by R^2 to additionally multiply coefficients by R after montgomery reduction +const UINT32 SYMCRYPT_MLKEM_INTTFixupTimesRsqr = 1441; +const UINT32 SYMCRYPT_MLKEM_INTTFixupTimesRsqrTimesNegQInvModR = 10079; + +VOID +SYMCRYPT_CALL +SymCryptMlKemPolyElementINTTAndMulR( + _Inout_ PSYMCRYPT_MLKEM_POLYELEMENT peSrc ) +{ + UINT32 i; + + SymCryptMlKemPolyElementINTTLayer( peSrc, 127, 2 ); + SymCryptMlKemPolyElementINTTLayer( peSrc, 63, 4 ); + SymCryptMlKemPolyElementINTTLayer( peSrc, 31, 8 ); + SymCryptMlKemPolyElementINTTLayer( peSrc, 15, 16 ); + SymCryptMlKemPolyElementINTTLayer( peSrc, 7, 32 ); + SymCryptMlKemPolyElementINTTLayer( peSrc, 3, 64 ); + SymCryptMlKemPolyElementINTTLayer( peSrc, 1, 128 ); + + for( i=0; i<SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; i++) + { + peSrc->coeffs[i] = (UINT16) SymCryptMlKemMontMul( + peSrc->coeffs[i], SYMCRYPT_MLKEM_INTTFixupTimesRsqr, SYMCRYPT_MLKEM_INTTFixupTimesRsqrTimesNegQInvModR ); + } +} + +// ((1<<33) / SYMCRYPT_MLKEM_Q) rounded to nearest integer +// +// 1<<33 is the smallest power of 2 s.t. the constant has sufficient precision to round +// all inputs correctly in compression for all nBitsPerCoefficient < 12. A smaller +// constant could be used for smaller nBitsPerCoefficient for a small performance gain +// +const UINT32 SYMCRYPT_MLKEM_COMPRESS_MULCONSTANT = 0x275f6f; +const UINT32 SYMCRYPT_MLKEM_COMPRESS_SHIFTCONSTANT = 33; + +VOID +SYMCRYPT_CALL +SymCryptMlKemPolyElementCompressAndEncode( + _In_ PCSYMCRYPT_MLKEM_POLYELEMENT peSrc, + UINT32 nBitsPerCoefficient, + _Out_writes_bytes_(nBitsPerCoefficient*(SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8)) + PBYTE pbDst ) +{ + UINT32 i; + UINT64 multiplication; + UINT32 coefficient; + UINT32 nBitsInCoefficient; + UINT32 bitsToEncode; + UINT32 nBitsToEncode; + UINT32 cbDstWritten = 0; + UINT32 accumulator = 0; + UINT32 nBitsInAccumulator = 0; + + SYMCRYPT_ASSERT( nBitsPerCoefficient > 0 ); + SYMCRYPT_ASSERT( nBitsPerCoefficient <= 12 ); + + for( i=0; i<SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; i++ ) + { + nBitsInCoefficient = nBitsPerCoefficient; + coefficient = peSrc->coeffs[i]; // in range [0, Q-1] + SYMCRYPT_ASSERT( coefficient < SYMCRYPT_MLKEM_Q ); + + // first compress the coefficient + // when nBitsPerCoefficient < 12 we compress per Compress_d in FIPS 203; + if(nBitsPerCoefficient < 12) + { + // Multiply by 2^(nBitsPerCoefficient+1) / Q by multiplying by constant and shifting right + multiplication = SYMCRYPT_MUL32x32TO64(coefficient, SYMCRYPT_MLKEM_COMPRESS_MULCONSTANT); + coefficient = (UINT32) (multiplication >> (SYMCRYPT_MLKEM_COMPRESS_SHIFTCONSTANT-(nBitsPerCoefficient+1))); + + // add "half" to round to nearest integer + coefficient++; + + // final divide by two to get multiplication by 2^nBitsPerCoefficient / Q + coefficient >>= 1; // in range [0, 2^nBitsPerCoefficient] + SYMCRYPT_ASSERT(coefficient <= (1UL<<nBitsPerCoefficient)); + + // modular reduction by masking + coefficient &= (1UL<<nBitsPerCoefficient)-1; // in range [0, 2^nBitsPerCoefficient - 1] + SYMCRYPT_ASSERT(coefficient < (1UL<<nBitsPerCoefficient)); + } + + // encode the coefficient + // simple loop to add bits to accumulator and write accumulator to output + do + { + nBitsToEncode = SYMCRYPT_MIN(nBitsInCoefficient, 32-nBitsInAccumulator); + + bitsToEncode = coefficient & ((1UL<<nBitsToEncode)-1); + coefficient >>= nBitsToEncode; + nBitsInCoefficient -= nBitsToEncode; + + accumulator |= (bitsToEncode << nBitsInAccumulator); + nBitsInAccumulator += nBitsToEncode; + if(nBitsInAccumulator == 32) + { + SYMCRYPT_STORE_LSBFIRST32( pbDst+cbDstWritten, accumulator ); + cbDstWritten += 4; + accumulator = 0; + nBitsInAccumulator = 0; + } + } while( nBitsInCoefficient > 0 ); + } + + SYMCRYPT_ASSERT(nBitsInAccumulator == 0); + SYMCRYPT_ASSERT(cbDstWritten == (nBitsPerCoefficient*(SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8))); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlKemPolyElementDecodeAndDecompress( + _In_reads_bytes_(nBitsPerCoefficient*(SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8)) + PCBYTE pbSrc, + UINT32 nBitsPerCoefficient, + _Out_ PSYMCRYPT_MLKEM_POLYELEMENT peDst ) +{ + UINT32 i; + UINT32 coefficient; + UINT32 nBitsInCoefficient; + UINT32 bitsToDecode; + UINT32 nBitsToDecode; + UINT32 cbSrcRead = 0; + UINT32 accumulator = 0; + UINT32 nBitsInAccumulator = 0; + + SYMCRYPT_ASSERT( nBitsPerCoefficient > 0 ); + SYMCRYPT_ASSERT( nBitsPerCoefficient <= 12 ); + + for( i=0; i<SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; i++ ) + { + coefficient = 0; + nBitsInCoefficient = 0; + + // first gather and decode bits from pbSrc + do + { + if(nBitsInAccumulator == 0) + { + accumulator = SYMCRYPT_LOAD_LSBFIRST32( pbSrc+cbSrcRead ); + cbSrcRead += 4; + nBitsInAccumulator = 32; + } + + nBitsToDecode = SYMCRYPT_MIN(nBitsPerCoefficient-nBitsInCoefficient, nBitsInAccumulator); + SYMCRYPT_ASSERT(nBitsToDecode <= nBitsInAccumulator); + + bitsToDecode = accumulator & ((1UL<<nBitsToDecode)-1); + accumulator >>= nBitsToDecode; + nBitsInAccumulator -= nBitsToDecode; + + coefficient |= (bitsToDecode << nBitsInCoefficient); + nBitsInCoefficient += nBitsToDecode; + } while( nBitsPerCoefficient > nBitsInCoefficient ); + SYMCRYPT_ASSERT(nBitsInCoefficient == nBitsPerCoefficient); + + // decompress the coefficient + // when nBitsPerCoefficient < 12 we decompress per Decompress_d in FIPS 203 + // otherwise we perform input validation per 203 6.2 Input validation 2 (Modulus check) + if(nBitsPerCoefficient < 12) + { + // Multiply by Q / 2^(nBitsPerCoefficient-1) by multiplying by constant and shifting right + coefficient *= SYMCRYPT_MLKEM_Q; + coefficient >>= (nBitsPerCoefficient-1); + + // add "half" to round to nearest integer + coefficient++; + + // final divide by two to get multiplication by Q / 2^nBitsPerCoefficient + coefficient >>= 1; // in range [0, Q] + + // modular reduction by conditional subtraction + coefficient = SymCryptMlKemModSub( coefficient, SYMCRYPT_MLKEM_Q ); + SYMCRYPT_ASSERT( coefficient < SYMCRYPT_MLKEM_Q ); + } + else if( coefficient >= SYMCRYPT_MLKEM_Q ) + { + // input validation failure - this can happen with a malformed or corrupt encapsulation + // or decapsulation key; we do not need to be constant time because we treat the + // validity of an imported key as public information. + return SYMCRYPT_INVALID_BLOB; + } + + peDst->coeffs[i] = (UINT16) coefficient; + } + + SYMCRYPT_ASSERT(nBitsInAccumulator == 0); + SYMCRYPT_ASSERT(cbSrcRead == (nBitsPerCoefficient*(SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8))); + + return SYMCRYPT_NO_ERROR; +} + +VOID +SYMCRYPT_CALL +SymCryptMlKemPolyElementSampleNTTFromShake128( + _Inout_ PSYMCRYPT_SHAKE128_STATE pState, + _Out_ PSYMCRYPT_MLKEM_POLYELEMENT peDst ) +{ + UINT32 i=0; + BYTE shakeOutputBuf[3*8]; // Keccak likes extracting multiples of 8-bytes + UINT32 currBufIndex = sizeof(shakeOutputBuf); + UINT16 sample0, sample1; + + while( i<SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS ) + { + SYMCRYPT_ASSERT(currBufIndex <= sizeof(shakeOutputBuf)); + if( currBufIndex == sizeof(shakeOutputBuf) ) + { + SymCryptShake128Extract(pState, shakeOutputBuf, sizeof(shakeOutputBuf), FALSE); + currBufIndex = 0; + } + + sample0 = SYMCRYPT_LOAD_LSBFIRST16( shakeOutputBuf+currBufIndex ) & 0xfff; + sample1 = SYMCRYPT_LOAD_LSBFIRST16( shakeOutputBuf+currBufIndex+1 ) >> 4; + currBufIndex += 3; + + peDst->coeffs[i] = sample0; + i += sample0 < SYMCRYPT_MLKEM_Q; + + if( i<SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS ) + { + peDst->coeffs[i] = sample1; + i += sample1 < SYMCRYPT_MLKEM_Q; + } + } +} + +VOID +SYMCRYPT_CALL +SymCryptMlKemPolyElementSampleCBDFromBytes( + _In_reads_bytes_(eta*2*(SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8) + 1) + PCBYTE pbSrc, + _In_range_(2,3) UINT32 eta, + _Out_ PSYMCRYPT_MLKEM_POLYELEMENT peDst ) +{ + UINT32 i, j; + UINT32 sampleBits; + UINT32 coefficient; + + SYMCRYPT_ASSERT((eta == 2) || (eta == 3)); + if( eta == 3 ) + { + for( i=0; i<SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; i+=4 ) + { + // unconditionally load 4 bytes into sampleBits, but only treat the load + // as being 3 bytes (24-bits -> 4 coefficients) for eta==3 to align to + // byte boundaries. Source buffer must be 1 byte larger than shake output + sampleBits = SYMCRYPT_LOAD_LSBFIRST32( pbSrc ); + pbSrc += 3; + + // sum bit samples - each consecutive slice of eta bits is summed together + sampleBits = (sampleBits&0x249249) + ((sampleBits>>1)&0x249249) + ((sampleBits>>2)&0x249249); + + for( j=0; j<4; j++ ) + { + // each coefficient is formed by taking the difference of two consecutive slices of eta bits + // the first eta bits are positive, the second eta bits are negative + coefficient = sampleBits & 0x3f; + sampleBits >>= 6; + coefficient = (coefficient&3) - (coefficient>>3); + SYMCRYPT_ASSERT((coefficient >= ((UINT32)-3)) || (coefficient <= 3)); + + coefficient = coefficient + (SYMCRYPT_MLKEM_Q & (coefficient >> 16)); // in range [0, Q-1] + SYMCRYPT_ASSERT( coefficient < SYMCRYPT_MLKEM_Q ); + + peDst->coeffs[i+j] = (UINT16) coefficient; + } + } + } + else + { + for( i=0; i<SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS; i+=8 ) + { + // unconditionally load 4 bytes (32-bits -> 8 coefficients) into sampleBits + sampleBits = SYMCRYPT_LOAD_LSBFIRST32( pbSrc ); + pbSrc += 4; + + // sum bit samples - each consecutive slice of eta bits is summed together + sampleBits = (sampleBits&0x55555555) + ((sampleBits>>1)&0x55555555); + + for( j=0; j<8; j++ ) + { + // each coefficient is formed by taking the difference of two consecutive slices of eta bits + // the first eta bits are positive, the second eta bits are negative + coefficient = sampleBits & 0xf; + sampleBits >>= 4; + coefficient = (coefficient&3) - (coefficient>>2); + SYMCRYPT_ASSERT((coefficient >= ((UINT32)-2)) || (coefficient <= 2)); + + coefficient = coefficient + (SYMCRYPT_MLKEM_Q & (coefficient >> 16)); // in range [0, Q-1] + SYMCRYPT_ASSERT( coefficient < SYMCRYPT_MLKEM_Q ); + + peDst->coeffs[i+j] = (UINT16) coefficient; + } + } + } +} + +VOID +SYMCRYPT_CALL +SymCryptMlKemMatrixTranspose( + _Inout_ PSYMCRYPT_MLKEM_MATRIX pmSrc ) +{ + UINT32 i, j; + PSYMCRYPT_MLKEM_POLYELEMENT swap; + const UINT32 nRows = pmSrc->nRows; + + SYMCRYPT_ASSERT( nRows > 0 ); + SYMCRYPT_ASSERT( nRows <= SYMCRYPT_MLKEM_MATRIX_MAX_NROWS ); + + for( i=0; i<nRows; i++ ) + { + for( j=i+1; j<nRows; j++ ) + { + swap = pmSrc->apPolyElements[(i*nRows) + j]; + pmSrc->apPolyElements[(i*nRows) + j] = pmSrc->apPolyElements[(j*nRows) + i]; + pmSrc->apPolyElements[(j*nRows) + i] = swap; + } + } +} + +VOID +SYMCRYPT_CALL +SymCryptMlKemMatrixVectorMontMulAndAdd( + _In_ PCSYMCRYPT_MLKEM_MATRIX pmSrc1, + _In_ PCSYMCRYPT_MLKEM_VECTOR pvSrc2, + _Inout_ PSYMCRYPT_MLKEM_VECTOR pvDst, + _Inout_ PSYMCRYPT_MLKEM_POLYELEMENT_ACCUMULATOR paTmp ) +{ + UINT32 i, j; + const UINT32 nRows = pmSrc1->nRows; + PCSYMCRYPT_MLKEM_POLYELEMENT peSrc1, peSrc2; + PSYMCRYPT_MLKEM_POLYELEMENT peDst; + + SYMCRYPT_ASSERT( nRows > 0 ); + SYMCRYPT_ASSERT( nRows <= SYMCRYPT_MLKEM_MATRIX_MAX_NROWS ); + SYMCRYPT_ASSERT( pvSrc2->nRows == nRows ); + SYMCRYPT_ASSERT( pvDst->nRows == nRows ); + + // Zero paTmp + SymCryptWipeKnownSize( paTmp, SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT_ACCUMULATOR ); + + for( i=0; i<nRows; i++ ) + { + for( j=0; j<nRows; j++ ) + { + peSrc1 = pmSrc1->apPolyElements[(i*nRows) + j]; + peSrc2 = SYMCRYPT_INTERNAL_MLKEM_VECTOR_ELEMENT( j, pvSrc2 ); + SymCryptMlKemPolyElementMulAndAccumulate( peSrc1, peSrc2, paTmp ); + } + + // write accumulator to dest and zero accumulator + peDst = SYMCRYPT_INTERNAL_MLKEM_VECTOR_ELEMENT( i, pvDst ); + SymCryptMlKemMontgomeryReduceAndAddPolyElementAccumulatorToPolyElement( paTmp, peDst ); + } +} + +VOID +SYMCRYPT_CALL +SymCryptMlKemVectorMontDotProduct( + _In_ PCSYMCRYPT_MLKEM_VECTOR pvSrc1, + _In_ PCSYMCRYPT_MLKEM_VECTOR pvSrc2, + _Inout_ PSYMCRYPT_MLKEM_POLYELEMENT peDst, + _Inout_ PSYMCRYPT_MLKEM_POLYELEMENT_ACCUMULATOR paTmp ) +{ + UINT32 i; + const UINT32 nRows = pvSrc1->nRows; + PCSYMCRYPT_MLKEM_POLYELEMENT peSrc1, peSrc2; + + SYMCRYPT_ASSERT( nRows > 0 ); + SYMCRYPT_ASSERT( nRows <= SYMCRYPT_MLKEM_MATRIX_MAX_NROWS ); + SYMCRYPT_ASSERT( pvSrc2->nRows == nRows ); + + // Zero paTmp and peDst + SymCryptWipeKnownSize( paTmp, SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT_ACCUMULATOR ); + SymCryptWipeKnownSize( peDst, SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT ); + + for( i=0; i<nRows; i++ ) + { + peSrc1 = SYMCRYPT_INTERNAL_MLKEM_VECTOR_ELEMENT( i, pvSrc1 ); + peSrc2 = SYMCRYPT_INTERNAL_MLKEM_VECTOR_ELEMENT( i, pvSrc2 ); + SymCryptMlKemPolyElementMulAndAccumulate( peSrc1, peSrc2, paTmp ); + } + + // write accumulator to dest and zero accumulator + SymCryptMlKemMontgomeryReduceAndAddPolyElementAccumulatorToPolyElement( paTmp, peDst ); +} + +VOID +SYMCRYPT_CALL +SymCryptMlKemVectorSetZero( + _Inout_ PSYMCRYPT_MLKEM_VECTOR pvSrc ) +{ + const UINT32 nRows = pvSrc->nRows; + + SYMCRYPT_ASSERT( nRows > 0 ); + SYMCRYPT_ASSERT( nRows <= SYMCRYPT_MLKEM_MATRIX_MAX_NROWS ); + + SymCryptWipe( (PBYTE) SYMCRYPT_INTERNAL_MLKEM_VECTOR_ELEMENT( 0, pvSrc ), nRows*SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT ); +} + +VOID +SYMCRYPT_CALL +SymCryptMlKemVectorMulR( + _In_ PCSYMCRYPT_MLKEM_VECTOR pvSrc, + _Out_ PSYMCRYPT_MLKEM_VECTOR pvDst ) +{ + UINT32 i; + const UINT32 nRows = pvSrc->nRows; + + SYMCRYPT_ASSERT( nRows > 0 ); + SYMCRYPT_ASSERT( nRows <= SYMCRYPT_MLKEM_MATRIX_MAX_NROWS ); + SYMCRYPT_ASSERT( pvDst->nRows == nRows ); + + for( i=0; i<nRows; i++ ) + { + SymCryptMlKemPolyElementMulR( + SYMCRYPT_INTERNAL_MLKEM_VECTOR_ELEMENT( i, pvSrc ), + SYMCRYPT_INTERNAL_MLKEM_VECTOR_ELEMENT( i, pvDst ) ); + } +} + +VOID +SYMCRYPT_CALL +SymCryptMlKemVectorAdd( + _In_ PCSYMCRYPT_MLKEM_VECTOR pvSrc1, + _In_ PCSYMCRYPT_MLKEM_VECTOR pvSrc2, + _Out_ PSYMCRYPT_MLKEM_VECTOR pvDst ) +{ + UINT32 i; + const UINT32 nRows = pvSrc1->nRows; + PCSYMCRYPT_MLKEM_POLYELEMENT peSrc1, peSrc2; + PSYMCRYPT_MLKEM_POLYELEMENT peDst; + + SYMCRYPT_ASSERT( nRows > 0 ); + SYMCRYPT_ASSERT( nRows <= SYMCRYPT_MLKEM_MATRIX_MAX_NROWS ); + SYMCRYPT_ASSERT( pvSrc2->nRows == nRows ); + SYMCRYPT_ASSERT( pvDst->nRows == nRows ); + + for( i=0; i<nRows; i++ ) + { + peSrc1 = SYMCRYPT_INTERNAL_MLKEM_VECTOR_ELEMENT( i, pvSrc1 ); + peSrc2 = SYMCRYPT_INTERNAL_MLKEM_VECTOR_ELEMENT( i, pvSrc2 ); + peDst = SYMCRYPT_INTERNAL_MLKEM_VECTOR_ELEMENT( i, pvDst ); + SymCryptMlKemPolyElementAdd( peSrc1, peSrc2, peDst ); + } +} + +VOID +SYMCRYPT_CALL +SymCryptMlKemVectorSub( + _In_ PCSYMCRYPT_MLKEM_VECTOR pvSrc1, + _In_ PCSYMCRYPT_MLKEM_VECTOR pvSrc2, + _Out_ PSYMCRYPT_MLKEM_VECTOR pvDst ) +{ + UINT32 i; + const UINT32 nRows = pvSrc1->nRows; + PCSYMCRYPT_MLKEM_POLYELEMENT peSrc1, peSrc2; + PSYMCRYPT_MLKEM_POLYELEMENT peDst; + + SYMCRYPT_ASSERT( nRows > 0 ); + SYMCRYPT_ASSERT( nRows <= SYMCRYPT_MLKEM_MATRIX_MAX_NROWS ); + SYMCRYPT_ASSERT( pvSrc2->nRows == nRows ); + SYMCRYPT_ASSERT( pvDst->nRows == nRows ); + + for( i=0; i<nRows; i++ ) + { + peSrc1 = SYMCRYPT_INTERNAL_MLKEM_VECTOR_ELEMENT( i, pvSrc1 ); + peSrc2 = SYMCRYPT_INTERNAL_MLKEM_VECTOR_ELEMENT( i, pvSrc2 ); + peDst = SYMCRYPT_INTERNAL_MLKEM_VECTOR_ELEMENT( i, pvDst ); + SymCryptMlKemPolyElementSub( peSrc1, peSrc2, peDst ); + } +} + +VOID +SYMCRYPT_CALL +SymCryptMlKemVectorNTT( + _Inout_ PSYMCRYPT_MLKEM_VECTOR pvSrc ) +{ + UINT32 i; + const UINT32 nRows = pvSrc->nRows; + + SYMCRYPT_ASSERT( nRows > 0 ); + SYMCRYPT_ASSERT( nRows <= SYMCRYPT_MLKEM_MATRIX_MAX_NROWS ); + + for( i=0; i<nRows; i++ ) + { + SymCryptMlKemPolyElementNTT( SYMCRYPT_INTERNAL_MLKEM_VECTOR_ELEMENT( i, pvSrc ) ); + } +} + +VOID +SYMCRYPT_CALL +SymCryptMlKemVectorINTTAndMulR( + _Inout_ PSYMCRYPT_MLKEM_VECTOR pvSrc ) +{ + UINT32 i; + const UINT32 nRows = pvSrc->nRows; + + SYMCRYPT_ASSERT( nRows > 0 ); + SYMCRYPT_ASSERT( nRows <= SYMCRYPT_MLKEM_MATRIX_MAX_NROWS ); + + for( i=0; i<nRows; i++ ) + { + SymCryptMlKemPolyElementINTTAndMulR( SYMCRYPT_INTERNAL_MLKEM_VECTOR_ELEMENT( i, pvSrc ) ); + } +} + +VOID +SYMCRYPT_CALL +SymCryptMlKemVectorCompressAndEncode( + _In_ PCSYMCRYPT_MLKEM_VECTOR pvSrc, + UINT32 nBitsPerCoefficient, + _Out_writes_bytes_(cbDst) PBYTE pbDst, + SIZE_T cbDst ) +{ + UINT32 i; + const UINT32 nRows = pvSrc->nRows; + PCSYMCRYPT_MLKEM_POLYELEMENT peSrc; + + SYMCRYPT_ASSERT( nRows > 0 ); + SYMCRYPT_ASSERT( nRows <= SYMCRYPT_MLKEM_MATRIX_MAX_NROWS ); + SYMCRYPT_ASSERT( nBitsPerCoefficient > 0 ); + SYMCRYPT_ASSERT( nBitsPerCoefficient <= 12 ); + SYMCRYPT_ASSERT( cbDst == nRows*nBitsPerCoefficient*(SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8) ); + + UNREFERENCED_PARAMETER( cbDst ); + + for( i=0; i<nRows; i++ ) + { + peSrc = SYMCRYPT_INTERNAL_MLKEM_VECTOR_ELEMENT( i, pvSrc ); + SymCryptMlKemPolyElementCompressAndEncode( peSrc, nBitsPerCoefficient, pbDst ); + pbDst += nBitsPerCoefficient*(SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8); + } +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlKemVectorDecodeAndDecompress( + _In_reads_bytes_(cbSrc) PCBYTE pbSrc, + SIZE_T cbSrc, + UINT32 nBitsPerCoefficient, + _Out_ PSYMCRYPT_MLKEM_VECTOR pvDst ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + UINT32 i; + const UINT32 nRows = pvDst->nRows; + PSYMCRYPT_MLKEM_POLYELEMENT peDst; + + SYMCRYPT_ASSERT( nRows > 0 ); + SYMCRYPT_ASSERT( nRows <= SYMCRYPT_MLKEM_MATRIX_MAX_NROWS ); + SYMCRYPT_ASSERT( nBitsPerCoefficient > 0 ); + SYMCRYPT_ASSERT( nBitsPerCoefficient <= 12 ); + SYMCRYPT_ASSERT( cbSrc == nRows*nBitsPerCoefficient*(SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8) ); + + UNREFERENCED_PARAMETER( cbSrc ); + + for( i=0; i<nRows; i++ ) + { + peDst = SYMCRYPT_INTERNAL_MLKEM_VECTOR_ELEMENT( i, pvDst ); + scError = SymCryptMlKemPolyElementDecodeAndDecompress( pbSrc, nBitsPerCoefficient, peDst ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + pbSrc += nBitsPerCoefficient*(SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8); + } + +cleanup: + return scError; +} + +VOID +SYMCRYPT_CALL +SymCryptMlKemkeyWipePrivateState( + _Inout_ PSYMCRYPT_MLKEMKEY pkMlKemkey ) +{ + SymCryptMlKemVectorSetZero( pkMlKemkey->pvs ); + SymCryptWipeKnownSize( pkMlKemkey->privateRandom, sizeof(pkMlKemkey->privateRandom) ); + SymCryptWipeKnownSize( pkMlKemkey->privateSeed, sizeof(pkMlKemkey->privateSeed) ); + pkMlKemkey->hasPrivateKey = FALSE; + pkMlKemkey->hasPrivateSeed = FALSE; +} + +#if SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_X86 +#ifdef __clang__ +#pragma clang attribute pop +#else +#pragma GCC pop_options +#endif +#endif diff --git a/libs/symcrypt/lib/modexp.c b/libs/symcrypt/lib/modexp.c new file mode 100644 index 00000000000..674200cd640 --- /dev/null +++ b/libs/symcrypt/lib/modexp.c @@ -0,0 +1,510 @@ +// +// modexp.c Modular exponentiation functions +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +// +// The windowed modular exponentiation algorithm works by generating a +// side-channel table of all the powers of the base from 0 up to 2^W - 1 +// where W is the window size: +// scsPrecomp = { 1, base, base^2, ..., base^(2^W-1) } +// +// TODO: To mitigate power analysis attacks when multiplying by 1 (which might +// contain a lot of zeros in non-Montgomery moduli), future work is to +// get rid of the 1 in the table. The leak is limited since now we always +// have Montgomery moduli. +// +// Then it slices the exponent into chunks of W bits and goes through +// each chunk of the exponent starting from the most significant +// chunk. For each chunk c_i it squares a temporary modelement +// W times and then multiplies it by scsPrecomp[c_i]. The starting +// value of the temporary modelement is scsPrecomp[c_0] i.e. the one +// corresponding to the most significant chunk. +// +// Denote by M and SQ the multiplications and squarings and by B = nBitsExp +// number of bits of the exponent. Then the algorithm does +// (2^W - 2)*M + (B-1)/W*(W*SQ + M) = +// (2^W + (B-1)/W -2) multiplications and (B-1) squarings +// +// It is beneficial to change the window size from W to W+1 when +// 2^(W+1) + (B-1)/(W+1) < 2^W + (B-1)/W => +// B > 2^W*W(W+1)+1 +// A simple table that calculates the optimal values for the window size +// is shown below. +// +// The minimum value of W is W=4 as 2^W should be a multiple +// of the groupsize of the scsTable, which is 4 by default. + +#define MIN_WINDOW_SIZE (4) + +static const UINT32 cutoffs[] = +{ + // 5, // W should be 2 for 5 < B <= 25 + // 25, // W should be 3 for 25 < B <= 97 + // 97, // W should be 4 for 97 < B <= 321 + 321, // W should be 5 for 321 < B <= 961 + // 961, // W should be 6 for 961 < B +}; + +static const UINT32 nCuttoffs = sizeof(cutoffs) / sizeof(cutoffs[0]); + +VOID +SYMCRYPT_CALL +SymCryptModExpWindowed( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peBase, + _In_ PCSYMCRYPT_INT piExp, + UINT32 nBitsExp, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 W = 0; + UINT32 nTableElements = 0; + + SYMCRYPT_SCSTABLE scsPrecomp = { 0 }; + UINT32 cbScsPrecomp = 0; + + UINT32 cbModElement = SymCryptSizeofModElementFromModulus( pmMod ); + + PSYMCRYPT_MODELEMENT peT1 = NULL; + PSYMCRYPT_MODELEMENT peT2 = NULL; + + UINT32 nIterations = 0; + UINT32 iBit = 0; + UINT32 nBits = 0; + UINT32 index = 0; + + // Truncate the nBitsExp if above the object size + nBitsExp = SYMCRYPT_MIN( nBitsExp, SymCryptIntBitsizeOfObject(piExp) ); + + // Calculate the window size + W = MIN_WINDOW_SIZE; + while ((W-MIN_WINDOW_SIZE < nCuttoffs) && (cutoffs[W-MIN_WINDOW_SIZE]<nBitsExp)) + { + W++; + } + nTableElements = (1<<W); + + // Initialize the table of temporary modelements + cbScsPrecomp = SymCryptScsTableInit( &scsPrecomp, nTableElements, cbModElement ); + + SYMCRYPT_ASSERT( cbScratch >= cbScsPrecomp + 2*cbModElement + SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pmMod->nDigits ) ); + + SymCryptScsTableSetBuffer( &scsPrecomp, pbScratch, cbScsPrecomp ); + pbScratch += cbScsPrecomp; + cbScratch -= cbScsPrecomp; + + // Create the temporary modelement + peT1 = SymCryptModElementCreate( pbScratch, cbModElement, pmMod ); + SYMCRYPT_ASSERT( peT1 != NULL ); + pbScratch += cbModElement; + cbScratch -= cbModElement; + peT2 = SymCryptModElementCreate( pbScratch, cbModElement, pmMod ); + SYMCRYPT_ASSERT( peT2 != NULL ); + pbScratch += cbModElement; + cbScratch -= cbModElement; + + // Fill the first element with 1 (**note: this will cause 0^0 = 1) + // and the second with peBase + SYMCRYPT_ASSERT( nTableElements >= 2 ); + + SymCryptModElementSetValueUint32( 1, pmMod, peT1, pbScratch, cbScratch ); + SymCryptScsTableStore( &scsPrecomp, 0, (PBYTE)peT1, cbModElement ); + + SymCryptModElementCopy( pmMod, peBase, peT1 ); + SymCryptScsTableStore( &scsPrecomp, 1, (PBYTE)peT1, cbModElement ); + + // Fill the table with the powers of peBase + for (UINT32 i=2; i<nTableElements; i++) + { + // TODO: Future improvement, use squarings for this table. + SymCryptModMul( pmMod, peT1, peBase, peT1, pbScratch, cbScratch ); + SymCryptScsTableStore( &scsPrecomp, i, (PBYTE)peT1, cbModElement ); + } + + // Find the number of iterations (minus one) and the starting position bit + SYMCRYPT_ASSERT( nBitsExp != 0 ); + nIterations = (nBitsExp - 1) / W; + iBit = nIterations * W; + + // Do the first chunk (it might be smaller than W bits) + nBits = nBitsExp - iBit; + index = SymCryptIntGetBits( piExp, iBit, nBits ); + SymCryptScsTableLoad( &scsPrecomp, index, (PBYTE)peT1, cbModElement ); + + // Work in batches of W bits in the exponent + for (UINT32 i=0; i<nIterations; i++) + { + // Square W times + for (UINT32 j=0; j<W; j++) + { + SymCryptModSquare( pmMod, peT1, peT1, pbScratch, cbScratch ); + } + + iBit -= W; + index = SymCryptIntGetBits( piExp, iBit, W ); + SymCryptScsTableLoad( &scsPrecomp, index, (PBYTE)peT2, cbModElement ); + + SymCryptModMul( pmMod, peT1, peT2, peT1, pbScratch, cbScratch ); + } + + SYMCRYPT_ASSERT( iBit == 0 ); + + SymCryptModElementCopy( pmMod, peT1, peDst ); +} + +VOID +SYMCRYPT_CALL +SymCryptModExpSquareAndMultiply32( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peBase, + _In_ PCSYMCRYPT_INT piExp, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 cbModElement = SymCryptSizeofModElementFromModulus( pmMod ); + + PSYMCRYPT_MODELEMENT peT1 = NULL; + PSYMCRYPT_MODELEMENT peT2 = NULL; + + // The bits of the exponent when this function is called are + // always less than 32. + UINT32 exp = SymCryptIntGetValueLsbits32( piExp ); + + SYMCRYPT_ASSERT( cbScratch >= 2*cbModElement + SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pmMod->nDigits ) ); + + // Create the temporary modelements + peT1 = SymCryptModElementCreate( pbScratch, cbModElement, pmMod ); + SYMCRYPT_ASSERT( peT1 != NULL ); + pbScratch += cbModElement; + cbScratch -= cbModElement; + peT2 = SymCryptModElementCreate( pbScratch, cbModElement, pmMod ); + SYMCRYPT_ASSERT( peT2 != NULL ); + pbScratch += cbModElement; + cbScratch -= cbModElement; + + if (exp == 0) + { + SymCryptModElementSetValueUint32( 1, pmMod, peDst, pbScratch, cbScratch ); + } + else + { + SymCryptModElementSetValueUint32( 1, pmMod, peT1, pbScratch, cbScratch ); + SymCryptModElementCopy( pmMod, peBase, peT2 ); + + while (exp>1) + { + if (exp%2 == 1) + { + SymCryptModMul( pmMod, peT1, peT2, peT1, pbScratch, cbScratch ); + } + + SymCryptModSquare( pmMod, peT2, peT2, pbScratch, cbScratch ); + exp /= 2; + } + + SymCryptModMul( pmMod, peT1, peT2, peDst, pbScratch, cbScratch ); + } +} + +VOID +SYMCRYPT_CALL +SymCryptModExpGeneric( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peBase, + _In_ PCSYMCRYPT_INT piExp, + UINT32 nBitsExp, + UINT32 flags, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + if ( ((flags & SYMCRYPT_FLAG_DATA_PUBLIC)!=0) && (nBitsExp <= sizeof(UINT32)*8) ) + { + SymCryptModExpSquareAndMultiply32( pmMod, peBase, piExp, peDst, pbScratch, cbScratch ); + } + else + { + SymCryptModExpWindowed( pmMod, peBase, piExp, nBitsExp, peDst, pbScratch, cbScratch ); // This is the default + } +} + +// +// MultiExponentiation +// + +// SYMCRYPT_MODMULTIEXP_MAX_NPRECOMP: The maximum number of precomputed powers of the +// base point allowed for the multi-exponentiation operation. +// It should be equal to 2^(SYMCRYPT_FDEF_MAX_WINDOW_MODEXP-1) +#define SYMCRYPT_MODMULTIEXP_MAX_NPRECOMP (1<<(SYMCRYPT_FDEF_MAX_WINDOW_MODEXP-1)) + +// SYMCRYPT_MODMULTIEXP_WINDOW_SIZE: Fixed window size for the WnafWithInterleaving +// implementation. It is found to give the faster running times for sizes +// 512 - 2048 bits. +#define SYMCRYPT_MODMULTIEXP_WINDOW_SIZE (5) + +C_ASSERT( (1 << (SYMCRYPT_MODMULTIEXP_WINDOW_SIZE-1)) <= SYMCRYPT_MODMULTIEXP_MAX_NPRECOMP ); + +// +// The following function fills the table with odd powers +// of the base point B. +// +// The first value must be filled by the caller. +VOID +SYMCRYPT_CALL +SymCryptModExpPrecomputation( + _In_ PCSYMCRYPT_MODULUS pmP, + UINT32 nPrecomputedPowers, + _In_reads_( SYMCRYPT_MODMULTIEXP_MAX_NPRECOMP ) + PSYMCRYPT_MODELEMENT * pePIs, + PSYMCRYPT_MODELEMENT peTemp, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch +) +{ + SYMCRYPT_ASSERT(nPrecomputedPowers>=2); + + // Calculate B^2 + SymCryptModSquare( pmP, pePIs[0], peTemp, pbScratch, cbScratch ); + + for (UINT32 i=1; i<nPrecomputedPowers; i++) + { + // B[i] = B^2*B[i-1] + SymCryptModMul( pmP, peTemp, pePIs[i-1], pePIs[i], pbScratch, cbScratch ); + } +} + +// +// The following is a similar algorithm to SymCryptEcpointMultiScalarMulWnafWithInterleaving. +// It is a NON SIDE-CHANNEL SAFE algorithm. +// +VOID +SYMCRYPT_CALL +SymCryptModMultiExpWnafWithInterleaving( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_reads_( nBases ) PCSYMCRYPT_MODELEMENT * peBaseArray, + _In_reads_( nBases ) PCSYMCRYPT_INT * piExpArray, + UINT32 nBases, + UINT32 nBitsExp, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + UINT32 i, j; + + UINT32 w = 0; + UINT32 nPrecompPoints = 0; + UINT32 nRecodedDigits = 0; + + // Masks + UINT32 fOne[SYMCRYPT_MODMULTIEXP_MAX_NBASES] = { 0 }; + UINT32 fOneTot = 0xffffffff; // Final result 1 + + UINT32 fZeroExp = 0; // Zero exponent + UINT32 fZeroTot = 0; // Final result 0 + + UINT32 cbModElement = SymCryptSizeofModElementFromModulus( pmMod ); + + // ==================================================== + // Temporaries + PSYMCRYPT_MODELEMENT pePIs[SYMCRYPT_MODMULTIEXP_MAX_NBASES*SYMCRYPT_MODMULTIEXP_MAX_NPRECOMP] = { 0 }; + PSYMCRYPT_MODELEMENT peTemp = NULL; + PSYMCRYPT_MODELEMENT peOne = NULL; + + PUINT32 absofKIs = NULL; + // =================================================== + + // Calculate the window size + w = SYMCRYPT_MODMULTIEXP_WINDOW_SIZE; + nPrecompPoints = (1 << (w-1)); // We only store odd powers of the base point + + // Number of recoded digits + nRecodedDigits = nBitsExp; + + // + // From symcrypt_internal.h we have: + // - sizeof results are upper bounded by 2^19 + // - SYMCRYPT_SCRATCH_BYTES results are upper bounded by 2^27 (including RSA and ECURVE) + // - nBases, nPrecompPoints, and nRecodedDigits are bounded by SYMCRYPT_MODMULTIEXP_MAX_NBASES, + // SYMCRYPT_MODMULTIEXP_MAX_NBITSEXP, and SYMCRYPT_MODMULTIEXP_MAX_NPRECOMP, respectively. + // Thus the following calculation does not overflow cbScratch. + // + SYMCRYPT_ASSERT( SYMCRYPT_MODMULTIEXP_MAX_NBASES >= nBases ); + SYMCRYPT_ASSERT( SYMCRYPT_MODMULTIEXP_MAX_NPRECOMP >= nPrecompPoints ); + + // Creating temporary precomputed modelements + for (i=0; i<nBases*nPrecompPoints; i++) + { + SYMCRYPT_ASSERT( cbScratch >= cbModElement ); + pePIs[i] = SymCryptModElementCreate( pbScratch, cbModElement, pmMod ); + SYMCRYPT_ASSERT( pePIs[i] != NULL ); + pbScratch += cbModElement; + cbScratch -= cbModElement; + } + + SYMCRYPT_ASSERT( cbScratch >= + 2*cbModElement + + ((nBases*nRecodedDigits*sizeof(UINT32) + SYMCRYPT_ASYM_ALIGN_VALUE - 1)/SYMCRYPT_ASYM_ALIGN_VALUE)*SYMCRYPT_ASYM_ALIGN_VALUE + + SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( SymCryptModulusDigitsizeOfObject( pmMod ) ) ); + + // Creating temporary points + peTemp = SymCryptModElementCreate( pbScratch, cbModElement, pmMod ); + SYMCRYPT_ASSERT( peTemp != NULL ); + pbScratch += cbModElement; + cbScratch -= cbModElement; + + peOne = SymCryptModElementCreate( pbScratch, cbModElement, pmMod ); + SYMCRYPT_ASSERT( peOne != NULL ); + pbScratch += cbModElement; + cbScratch -= cbModElement; + + // Fixing pointers to recoded digits (be careful that the remaining space is SYMCRYPT_ASYM_ALIGNed) + absofKIs = (PUINT32) pbScratch; + pbScratch += nBases * nRecodedDigits * sizeof(UINT32); + cbScratch -= nBases * nRecodedDigits * sizeof(UINT32); + + // Update cbScratch first using pbScratch, as the amount of scratch skipped for alignment depends upon the alignment of pbScratch + cbScratch -= ( ((SIZE_T)pbScratch + SYMCRYPT_ASYM_ALIGN_VALUE - 1) & ~(SYMCRYPT_ASYM_ALIGN_VALUE - 1) ) - (SIZE_T)pbScratch; + pbScratch = (PBYTE) ( ((SIZE_T)pbScratch + SYMCRYPT_ASYM_ALIGN_VALUE - 1) & ~(SYMCRYPT_ASYM_ALIGN_VALUE - 1) ); + + + // + // Main algorithm + // + + // Set peOne to 1 + SymCryptModElementSetValueUint32( 1, pmMod, peOne, pbScratch, cbScratch ); + + // Zero-out all recoded digits + SymCryptWipe( (PBYTE)absofKIs, nBases*nRecodedDigits*sizeof(UINT32) ); + + for (j = 0; j<nBases; j++) + { + // Check if the exponent is zero + fZeroExp = SymCryptIntIsEqualUint32( piExpArray[j], 0 ); + + // Check if the result is 0 (i.e. 0^e with e!=0) + if( !fZeroExp && SymCryptModElementIsZero(pmMod, peBaseArray[j]) ) + { + fZeroTot = 0xffffffff; + break; + } + + // Check if the exponent is 0 or if the base point is 1 + fOne[j] = ( fZeroExp | SymCryptModElementIsEqual( pmMod, peBaseArray[j], peOne ) ); + fOneTot &= fOne[j]; + + // Skip the recoding stage (and all remaining steps) if this point will give result 1 + if (!fOne[j]) + { + // Recoding stage + SymCryptPositiveWidthNafRecoding( w, piExpArray[j], nBitsExp, &absofKIs[j*nRecodedDigits], nRecodedDigits ); + + // Copy the base in the start of the pePIs array + SymCryptModElementCopy( pmMod, peBaseArray[j], pePIs[j*nPrecompPoints] ); + + // Precomputation stage + SymCryptModExpPrecomputation( pmMod, nPrecompPoints, &pePIs[j*nPrecompPoints], peTemp, pbScratch, cbScratch ); + } + } + + if (fZeroTot) + { + SymCryptModElementSetValueUint32( 0, pmMod, peDst, pbScratch, cbScratch ); + } + else + { + SymCryptModElementSetValueUint32( 1, pmMod, peTemp, pbScratch, cbScratch ); + + if (!fOneTot) + { + // Main loop + for (INT32 i = nRecodedDigits-1; i>-1; i--) + { + SymCryptModSquare( pmMod, peTemp, peTemp, pbScratch, cbScratch ); + + for (j = 0; j<nBases; j++) + { + if (absofKIs[j*nRecodedDigits + i] != 0) + { + SymCryptModMul( pmMod, peTemp, pePIs[j*nPrecompPoints + absofKIs[j*nRecodedDigits + i]/2], peTemp, pbScratch, cbScratch ); + } + } + } + } + + // Copy the result into the destination + SymCryptModElementCopy( pmMod, peTemp, peDst ); + } +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptModMultiExpGeneric( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_reads_( nBases ) PCSYMCRYPT_MODELEMENT * peBaseArray, + _In_reads_( nBases ) PCSYMCRYPT_INT * piExpArray, + UINT32 nBases, + UINT32 nBitsExp, + UINT32 flags, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + if ( (nBases > SYMCRYPT_MODMULTIEXP_MAX_NBASES) || + (nBitsExp > SYMCRYPT_MODMULTIEXP_MAX_NBITSEXP) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if ((flags & SYMCRYPT_FLAG_DATA_PUBLIC)!=0) + { + SymCryptModMultiExpWnafWithInterleaving( pmMod, peBaseArray, piExpArray, nBases, nBitsExp, peDst, pbScratch, cbScratch ); + } + else + { + UINT32 cbModElement = 0; + PSYMCRYPT_MODELEMENT peTemp = NULL; + PSYMCRYPT_MODELEMENT peAcc = NULL; + + // Use two temporary modelements to store the results + // *** Make sure that the scratch space is enough i.e. the scratch space of ModMultiExp is + // at least 2 modelements bigger than the scratch space of ModExp + cbModElement = SymCryptSizeofModElementFromModulus( pmMod ); + + SYMCRYPT_ASSERT( SYMCRYPT_SCRATCH_BYTES_FOR_MODEXP(SymCryptModulusDigitsizeOfObject(pmMod)) + 2*cbModElement <= + SYMCRYPT_SCRATCH_BYTES_FOR_MODMULTIEXP( SymCryptModulusDigitsizeOfObject(pmMod), nBases, nBitsExp ) ); + SYMCRYPT_ASSERT( cbScratch >= 2*cbModElement + SYMCRYPT_SCRATCH_BYTES_FOR_MODEXP(SymCryptModulusDigitsizeOfObject(pmMod)) ); + + peTemp = SymCryptModElementCreate( pbScratch, cbModElement, pmMod ); + pbScratch += cbModElement; cbScratch -= cbModElement; + + peAcc = SymCryptModElementCreate( pbScratch, cbModElement, pmMod ); + pbScratch += cbModElement; cbScratch -= cbModElement; + + // Set peAcc to 1 + SymCryptModElementSetValueUint32( 1, pmMod, peAcc, pbScratch, cbScratch ); + + for (UINT32 i=0; i<nBases; i++) + { + SymCryptModExpWindowed( pmMod, peBaseArray[i], piExpArray[i], nBitsExp, peTemp, pbScratch, cbScratch ); + + SymCryptModMul( pmMod, peAcc, peTemp, peAcc, pbScratch, cbScratch ); + } + + // Copy the result into the destination + SymCryptModElementCopy( pmMod, peAcc, peDst ); + } + +cleanup: + return scError; +} diff --git a/libs/symcrypt/lib/paddingPkcs7.c b/libs/symcrypt/lib/paddingPkcs7.c new file mode 100644 index 00000000000..5d3a466b253 --- /dev/null +++ b/libs/symcrypt/lib/paddingPkcs7.c @@ -0,0 +1,167 @@ +// +// paddingPkcs7.c Add/Remove PKCS7 padding +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + + +VOID +SYMCRYPT_CALL +SymCryptPaddingPkcs7Add( + SIZE_T cbBlockSize, + _In_reads_(cbSrc) PCBYTE pbSrc, + SIZE_T cbSrc, + _Out_writes_to_(cbDst, *pcbResult) PBYTE pbDst, + SIZE_T cbDst, + SIZE_T* pcbResult) +{ + SIZE_T cbPadVal; // PadVal is the number of bytes to pad. + SIZE_T cbDataLastBlock; // dwDataLastBlock is the number of bytes of data at the final block. + SIZE_T cbResult = 0; // This variable must always have a valid value when we finish the function. + + SYMCRYPT_ASSERT(cbBlockSize < 256); // cbBlockSize must be < 256 + SYMCRYPT_ASSERT((cbBlockSize & (cbBlockSize - 1)) == 0); // cbBlockSize must be a power of 2 + + // + // Compute the padding parameters. + // + + cbDataLastBlock = (cbSrc & (cbBlockSize - 1)); + + cbResult = (cbSrc - cbDataLastBlock + cbBlockSize); + + SYMCRYPT_ASSERT(cbDst >= cbResult); // cbDst >= cbSrc - cbSrc % cbBlockSize + cbBlockSize + + if (cbResult > cbDst) + { + goto cleanup; + } + + cbPadVal = (cbBlockSize - cbDataLastBlock); + + // + // perform the padding + // + + // cbSrc must be greater than zero. memcpy(pbDst, NULL, 0) is not defined! + if (pbDst != pbSrc && cbSrc > 0) + { + memcpy(pbDst, pbSrc, cbSrc); + } + + memset(pbDst + cbSrc, (int)cbPadVal, cbPadVal); + +cleanup: + *pcbResult = cbResult; +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptPaddingPkcs7Remove( + SIZE_T cbBlockSize, + _In_reads_(cbSrc) PCBYTE pbSrc, + SIZE_T cbSrc, + _Out_writes_to_(cbDst, *pcbResult) PBYTE pbDst, + SIZE_T cbDst, + SIZE_T* pcbResult) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + UINT32 mPaddingError = 0; // Indicates whether there is an error in padding or not. + UINT32 mBufferSizeError = 0; // Indicates whether pbDst is large enough to contain the entire message. + UINT32 mask = 0; // Mask for message bytes at the final block. + UINT32 cbPadVal; // PadVal is the number of padded bytes. + UINT32 cbSrc32; + UINT32 cbDst32; + UINT32 cbMsg32; + + SIZE_T cbBulk = 0; + SIZE_T cbResult; // This variable must always have a valid value when we finish the function. + + + SYMCRYPT_ASSERT(cbBlockSize < 256); // cbBlockSize must be < 256 + SYMCRYPT_ASSERT((cbBlockSize & (cbBlockSize - 1)) == 0); // cbBlockSize must be a power of 2 + SYMCRYPT_ASSERT((cbSrc & (cbBlockSize - 1)) == 0); // cbSrc is a multiple of cbBlockSize + SYMCRYPT_ASSERT(cbSrc > 0); // cbSrc is greaten than zero + + cbPadVal = (UINT32)pbSrc[cbSrc - 1]; + + // check the Padding to make sure it is valid. + mPaddingError |= SymCryptMask32IsZeroU31(cbPadVal) | SymCryptMask32LtU31((UINT32)cbBlockSize, cbPadVal); + + // If cbPadVal is greater than cbSrc, SYMCRYPT_INVALID_ARGUMENT will be returned + // and cbResult will not be the right value. + cbResult = cbSrc - cbPadVal; + + // + // Bulk processing + // + + cbDst = SYMCRYPT_MIN(cbDst, cbSrc); + + cbBulk = cbSrc - cbBlockSize; + + // cbSrc, cbDst, and blockSize are not secrets. + // This condition can be checked in a non-side channel safe way. + if (cbDst < cbBulk) + { + scError = SYMCRYPT_BUFFER_TOO_SMALL; + goto cleanup; + } + + if (pbDst != pbSrc) + { + memcpy(pbDst, pbSrc, cbBulk); + } + + // Updating parameters + pbSrc += cbBulk; cbSrc -= cbBulk; + pbDst += cbBulk; cbDst -= cbBulk; + + cbSrc32 = (UINT32)cbSrc; + cbDst32 = (UINT32)cbDst; + + // + // Validating padding + // + // If cbPadVal is greater than cbBlockSize, + // we have to limit cbPadVal to be at most equal to cbBlockSize. + cbPadVal = 1 + ((cbPadVal - 1) & (cbBlockSize - 1)); + cbMsg32 = (UINT32)(cbBlockSize - cbPadVal); + + //check Dst buffer length to make sure it is possible copy the whole message (not including the padding). + mBufferSizeError |= SymCryptMask32LtU31(cbDst32, cbMsg32); + + // + // Final Block processing + // + + // Updating only the bytes of the message and leaving the other bytes in pbDst unchanged. + // Validating the value of the padded bytes. + + for (UINT32 i = 0; i < cbBlockSize; ++i) // cbDst <= cbSrc == cbBlockSize + { + mask = SymCryptMask32LtU31(i, cbMsg32); + + mPaddingError |= (SymCryptMask32IsNonzeroU31((UINT32)pbSrc[i] ^ cbPadVal) & ~mask); + + if (i < cbDst) + { + pbDst[i] ^= (pbDst[i] ^ pbSrc[i]) & mask; + } + } + +cleanup: + + *pcbResult = cbResult; + + // Update scError with the two error masks. + // SYMCRYPT_INVALID_ARGUMENT gets precedence over SYMCRYPT_BUFFER_TOO_SMALL + scError ^= mBufferSizeError & (scError ^ SYMCRYPT_BUFFER_TOO_SMALL); + scError ^= mPaddingError & (scError ^ SYMCRYPT_INVALID_ARGUMENT); + + return scError; +} diff --git a/libs/symcrypt/lib/parhash.c b/libs/symcrypt/lib/parhash.c new file mode 100644 index 00000000000..ee933e1fd52 --- /dev/null +++ b/libs/symcrypt/lib/parhash.c @@ -0,0 +1,517 @@ +// +// ParHash.c +// Code shared with all the parallel hash implementations +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptParallelHashProcess_serial( + _In_ PCSYMCRYPT_PARALLEL_HASH pParHash, + _Inout_updates_bytes_( nStates * pParHash->pHash->stateSize ) PVOID pStates, + SIZE_T nStates, + _Inout_updates_( nOperations ) PSYMCRYPT_PARALLEL_HASH_OPERATION pOperations, + SIZE_T nOperations, + _Out_writes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + SIZE_T i; + PSYMCRYPT_PARALLEL_HASH_OPERATION op; + PCSYMCRYPT_HASH pHash; + + pHash = pParHash->pHash; + op = pOperations; + + // + // Wipe the scratch space to detect erroneous callers. + // We do this so that callers that test on a non-parallel platform will work on a platform that does support + // parallel operations. + // + if( cbScratch < pParHash->parScratchFixed + nStates * SYMCRYPT_PARALLEL_HASH_PER_STATE_SCRATCH ) + { + scError = SYMCRYPT_BUFFER_TOO_SMALL; + goto cleanup; + } + SymCryptWipeKnownSize( pbScratch, pParHash->parScratchFixed + nStates * SYMCRYPT_PARALLEL_HASH_PER_STATE_SCRATCH ); + + for( i=0; i<nOperations; i++ ) + { + if( op->iHash >= nStates ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + switch( op->hashOperation ) + { + case SYMCRYPT_HASH_OPERATION_APPEND: + (*pHash->appendFunc)( (PBYTE)pStates + pHash->stateSize * op->iHash, op->pbBuffer, op->cbBuffer ); + break; + + case SYMCRYPT_HASH_OPERATION_RESULT: + if( op->cbBuffer != pHash->resultSize ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + (*pHash->resultFunc)( (PBYTE)pStates + pHash->stateSize * op->iHash, op->pbBuffer ); + break; + + default: + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + op++; + } + +cleanup: + return scError; +} + +// +// This function looks at a state and decides what to do. +// If it returns FALSE, then this state is done and no further processing is required. +// If it returns TRUE, the pbData/cbData have to be processed in parallel. +// This function is called again on the same state after the pbData/cbData have been processed. +// +// Internally, it keeps track of the next step to be taken for this state. +// the processingState keeps track of the next action to take. +// + +// +// An enum to keep track of the state of a request block +// +BOOLEAN +SYMCRYPT_CALL +SymCryptParallelHashSetNextWork( PCSYMCRYPT_PARALLEL_HASH pParHash, PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE pScratch ) +{ + PSYMCRYPT_COMMON_HASH_STATE pState; + PCSYMCRYPT_HASH pHash; + PCSYMRYPT_PARALLEL_HASH_OPERATION pOp; + SIZE_T bytesInBuffer; + SIZE_T todo; + BOOLEAN res; + + // Retrieve the state we will operate on. + pState = (PSYMCRYPT_COMMON_HASH_STATE) pScratch->hashState; + pHash = pParHash->pHash; + + // + // This is a state machine where some states have to iterate + // The loop allows them to use 'continue' for that. + // +#pragma warning( suppress: 4127 ) // conditional expression is constant + while( TRUE ) + { + // + // At this point, the processing state, pbData/cbData, and next pointer define what needs to be done. + // STATE_NEXT: cbData == 0 and we have to process the remaining operations. + // STATE_DATA_START: We are working on the next operation; the first BytesAlreadyProcessed have been hashed, + // and the hash state has an empty buffer. + // STATE_DATA_END: We are working on the next operation (an append), and pbData/cbData have whatever partial block remains + // after all the whole blocks have been processed. + // STATE_PAD2: We are working on the next operation (a result), and have processed the first half of a 2-block padding. + // STATE_RESULT: We are working on the next operation (a result), and have processed all the padding. + // + // The pState->dataLength is updated whenever we copy bytes from the append into the state's buffer, or when + // we return TRUE and process bulk data. + // + pOp = pScratch->next; + switch( pScratch->processingState ) + { + case STATE_NEXT: + + if( pOp == NULL ) + { + return FALSE; + } + + bytesInBuffer = pState->bytesInBuffer; + + // SYMCRYPT_ASSERT( pOp->cbBuffer < ((SIZE_T)-1)/2 ); // used during testing + + if( pOp->hashOperation == SYMCRYPT_HASH_OPERATION_APPEND ) + { + pState->dataLengthL += pOp->cbBuffer; + if( pState->dataLengthL < pOp->cbBuffer ) { + pState->dataLengthH ++; // This is almost-unreachable code as it requires 2^64 bytes to be hashed. + } + + if( bytesInBuffer > 0 ) + { + SYMCRYPT_ASSERT( pHash->inputBlockSize > bytesInBuffer ); + + todo = SYMCRYPT_MIN( pHash->inputBlockSize - bytesInBuffer, pOp->cbBuffer ); + memcpy( &pState->buffer[bytesInBuffer], pOp->pbBuffer, todo ); + pState->bytesInBuffer += (UINT32) todo; + if( pState->bytesInBuffer == pHash->inputBlockSize ) + { + // + // We filled the buffer; set it for processing. + // Remember the # bytes we did and set the next state to process the rest of the request. + // + pScratch->pbData = &pState->buffer[0]; + pScratch->cbData = pHash->inputBlockSize; + if( todo == pOp->cbBuffer ) + { + // + // We finished the request after the pbData processing + // + pScratch->next = pOp->next; + // pScratch->processingState = STATE_NEXT // already has that value + } else { + pScratch->processingState = STATE_DATA_START; + SYMCRYPT_ASSERT( todo <= 0xff ); + pScratch->bytesAlreadyProcessed = (BYTE) todo; + } + + pState->bytesInBuffer = 0; // it will be after we process the block + return TRUE; + } else { + // + // We finished the operation; skip to the next one. + // + pScratch->next = pOp->next; + // pScratch->processingState = STATE_NEXT // already has that value + continue; + } + } else { + // + // Buffer is empty; process the bulk data + // + pScratch->pbData = pOp->pbBuffer; + pScratch->cbData = pOp->cbBuffer; + pScratch->processingState = STATE_DATA_END; + + // + // Return TRUE if there is real data to process, and just re-run the state + // machine if we should copy the partial block to the buffer. + // + if( pScratch->cbData >= pHash->inputBlockSize ) + { + return TRUE; + } else { + continue; + } + } + } else { + SYMCRYPT_ASSERT( pOp->hashOperation == SYMCRYPT_HASH_OPERATION_RESULT ); + + if( (*pParHash->parResult1Func)( pParHash, pState, pScratch, &res ) ) + { + return res; + } + } + break; + + case STATE_DATA_START: + // + // The next operation is an append, and the first few bytes of that operation have already been copied to + // the buffer and processed. We need to process the rest. + // Note that the # bytes remaining is never zero. + // + SYMCRYPT_ASSERT( pOp->hashOperation == SYMCRYPT_HASH_OPERATION_APPEND && pOp->cbBuffer >= pScratch->bytesAlreadyProcessed ); + + pScratch->pbData = pOp->pbBuffer + pScratch->bytesAlreadyProcessed; + pScratch->cbData = pOp->cbBuffer - pScratch->bytesAlreadyProcessed; + if( pScratch->cbData >= pHash->inputBlockSize ) + { + pScratch->processingState = STATE_DATA_END; + return TRUE; + } + + // + // We have less than one block left; this is exactly the same state as we have at the end of + // a normal append. Fall through to that code. + // + // FALLTHROUGH! + + case STATE_DATA_END: + // + // We finished processing the whole blocks of the pScratch->pbData, and have to process the rest. + // The current append is already popped off the work list. + // + if( pScratch->cbData > 0 ) + { + SYMCRYPT_ASSERT( pScratch->cbData < pHash->inputBlockSize ); + memcpy( &pState->buffer[0], pScratch->pbData, pScratch->cbData ); + pState->bytesInBuffer = (UINT32) pScratch->cbData; + } + pScratch->next = pOp->next; + pScratch->processingState = STATE_NEXT; + continue; + + case STATE_RESULT2: + if( (*pParHash->parResult2Func)( pParHash, pState, pScratch, &res ) ) + { + return res; + } + continue; + + case STATE_RESULT_DONE: + + (*pParHash->parResultDoneFunc)( pParHash, pState, pOp ); + + pScratch->next = pOp->next; + pScratch->processingState = STATE_NEXT; + continue; + } + } + + return FALSE; +} + + +// +// Comparison function used to sort the work into largest-first order. +// +int SYMCRYPT_CDECL +compareRequestSize( PCVOID p1, PCVOID p2 ) +{ + PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE * pp1 = (PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE *) p1; + PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE * pp2 = (PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE *) p2; + + UINT64 c1 = (*pp1)->bytes; + UINT64 c2 = (*pp2)->bytes; + + // + // This is 'reverse' compare function as we want the largest item first. + // + if( c1 < c2 ) + { + return 1; + } else if( c1 > c2 ) + { + return -1; + } else { + return 0; + } +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptParallelHashProcess( + _In_ PCSYMCRYPT_PARALLEL_HASH pParHash, + _Inout_updates_bytes_( nStates * pParHash->pHash->stateSize ) PVOID pStates, + SIZE_T nStates, + _Inout_updates_( nOperations ) PSYMCRYPT_PARALLEL_HASH_OPERATION pOperations, + SIZE_T nOperations, + _Out_writes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch, + UINT32 maxParallel ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE pScratchState; + PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE * pWork; + SIZE_T nWork; + PSYMCRYPT_PARALLEL_HASH_OPERATION pOp; + PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE pSc; + SIZE_T i; + UINT64 singleSize; + BOOLEAN sameSize; + SIZE_T nPar; + PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE * pNextWork; + SIZE_T todo; + SIZE_T nBytes; + PBYTE pbScratchEnd; + PBYTE pbFixedScratch; + SIZE_T cbFixedScratch; + PCSYMCRYPT_HASH pHash; + + if( nOperations == 0 ) + { + goto cleanup; + } + + pHash = pParHash->pHash; + + // + // The caller passes us a scratch buffer. We split that into the following pieces: + // + // <alignment space to SYMCRYPT_ALIGN_VALUE> + // SYMCRYPT_PARALLEL_HASH_SCRATCH pScratchState[ nStates ] + // PSYMCRYPT_PARALLEL_HASH_SCRATCH pWork[ nStates ] + // <alignment space to SYMCRYPT_SIMD_ELEMENT_SIZE> + // scratch space for parallel function + // + + pbScratchEnd = pbScratch + cbScratch; + pScratchState = (PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE) SYMCRYPT_ALIGN_UP( pbScratch ); + pWork = (PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE *) (pScratchState + nStates); + pbFixedScratch = (PBYTE)((((SIZE_T)(pWork + nStates)) + SYMCRYPT_SIMD_ELEMENT_SIZE - 1) & ~(SYMCRYPT_SIMD_ELEMENT_SIZE - 1)); + cbFixedScratch = pParHash->parScratchFixed; + + if( pbFixedScratch + cbFixedScratch > pbScratchEnd ) + { + scError = SYMCRYPT_BUFFER_TOO_SMALL; + goto cleanup; + } + + // + // Wipe the scratch state; this sets the pointers to NULL, and the byte counts to 0. + // + memset( pScratchState, 0, nStates * sizeof( *pScratchState )); + nWork = 0; + + // + // The general data structure is as follows. + // For each hash state, we keep our administration in the pScratchState[i]. This contains a pointer to the actual + // hash state, a pointer to a linked list of operations to be performed on this state, pointer/length of the + // current data to be processed, and a few more administrative items. + // We also keep the pWork array of pointers to our scratch states, which contains all the states that still need + // work to be done. + // + // We process over the operations in reverse order to make it easy to build a forward single-linked list + // + pOp = &pOperations[ nOperations ]; + while( pOp > pOperations ) + { + pOp--; + + if( pOp->iHash >= nStates ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + pSc = &pScratchState[ pOp->iHash ]; + + if( pSc->hashState == NULL ) + { + // + // We found a new state that is being modified by this set of operations. + // Set the pointer to the hash state, and add it to the work list. + // + SYMCRYPT_ASSERT( nWork < nStates ); + pSc->hashState = (PBYTE) pStates + pHash->stateSize * pOp->iHash; + pWork[nWork] = pSc; + nWork++; + } + + // + // We estimate how much work we have to do on each state, so that we can start on the largest ones + // and be more efficient. + // + if( pOp->hashOperation == SYMCRYPT_HASH_OPERATION_APPEND ) + { + pSc->bytes += pOp->cbBuffer; + } else if( pOp->hashOperation == SYMCRYPT_HASH_OPERATION_RESULT ) + { + // + // The result could be a 1 or 2-block operation; but it is mostly a 1-block one so that is what we budget. + // + pSc->bytes += pHash->inputBlockSize; + } else { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // + // Add the operation to the list of operations for this state + // + pOp->next = pSc->next; + pSc->next = pOp; + } + + // + // We have built all the structures. + // Run the SetNextWork on each of them, and drop the ones that don't have work. + // Also detect whether they are all the same size so that we can avoid the sorting cost. + // + SYMCRYPT_ASSERT( nWork > 0 ); + singleSize = (*pWork)->bytes; + sameSize = TRUE; + i = 0; + while( i < nWork ) + { + if( !SymCryptParallelHashSetNextWork( pParHash, pWork[i] ) ) + { + pWork[i] = pWork[nWork-1]; + nWork--; + continue; + } + + if( pWork[i]->bytes != singleSize ) + { + sameSize = FALSE; + } + i++; + } + + if( !sameSize ) + { + qsort( pWork, nWork, sizeof( *pWork ), &compareRequestSize ); + } + + nPar = SYMCRYPT_MIN( nWork, maxParallel ); // # parallel states we currently work on + pNextWork = pWork + nPar; // next work pointer. + + while( nWork > 0 ) + { + todo = pWork[0]->cbData; + for( i=1; i<nPar; i++ ) + { + todo = SYMCRYPT_MIN( todo, pWork[i]->cbData ); + } + + nBytes = todo & ~((SIZE_T)(pHash->inputBlockSize - 1)); + + (*pParHash->parAppendFunc)( pWork, nPar, nBytes, pbFixedScratch, cbFixedScratch ); + + for( i=0; i<nPar; i++ ) + { + if( pWork[i]->cbData < pHash->inputBlockSize ) + { + // + // Once we start a request we finish it; this is not optimal. + // It would be better to switch things around a bit, but that is much more complicated. + // Example: suppose we can do 4-parallel and have requests of size + // 9 8 7 6 6 6 + // Our code does + // Process first 4 of # blocks Resulting state + // 9 8 7 6 / 6 6 6 3 2 1 - / 6 6 + // 6 3 2 1 / 6 1 5 2 1 0 / 6 + // 6 more to finish for a total of 13 blocks. + // + // Better would be: + // Process first 4 of # blocks Resulting state + // 9 8 7 6 / 6 6 6 3 2 1 - / 6 6 + // 6 6 3 2 / 1 - 2 4 4 1 0 / 1 + // 4 more to finish for total of 12 blocks. + // + // Or even better: + // Process first 4 of # blocks Resulting state + // 9 8 7 6 / 6 6 5 4 3 2 1 / 6 6 + // 6 6 4 3 / 2 1 3 3 3 1 - / 2 1 + // 3 3 2 1 / 1 - 1 2 2 1 - / 1 - + // 2 more to finish for a total of 11 blocks. + // Note that this last one requires the interruption of a started hash computation. + // + + if( !SymCryptParallelHashSetNextWork( pParHash, pWork[i] )) + { + if( nWork > nPar ) + { + pWork[i] = *pNextWork++; + nWork--; + } else { + // + // Ugly: copy the last item here, and wind back the loop counter + // by one so that we will process the last item again. + // + pWork[i] = pWork[ --nPar ]; + i--; + nWork--; + } + } + } + } + } + SymCryptWipe( pbFixedScratch, cbFixedScratch ); + +cleanup: + return scError; +} diff --git a/libs/symcrypt/lib/pbkdf2.c b/libs/symcrypt/lib/pbkdf2.c new file mode 100644 index 00000000000..3c1d06e4329 --- /dev/null +++ b/libs/symcrypt/lib/pbkdf2.c @@ -0,0 +1,126 @@ +// +// pbkdf2.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +// +// This module contains the routines to implement the pbkdf2 function +// +// + +#include "precomp.h" + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptPbkdf2Derive( + _In_ PCSYMCRYPT_PBKDF2_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbSalt) PCBYTE pbSalt, + SIZE_T cbSalt, + UINT64 iterationCnt, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult) +{ + SYMCRYPT_MAC_STATE macState; + UINT32 iBlock; + SIZE_T bytes; + SIZE_T blockSize = pExpandedKey->macAlg->resultSize; + UINT64 iterations; + SYMCRYPT_ALIGN BYTE rbBlockResult[SYMCRYPT_MAC_MAX_RESULT_SIZE]; + SYMCRYPT_ALIGN BYTE rbWorkBuffer[SYMCRYPT_MAC_MAX_RESULT_SIZE]; + + SYMCRYPT_ASSERT( + blockSize <= SYMCRYPT_MAC_MAX_RESULT_SIZE && + cbResult > 0 ); + + if (iterationCnt == 0) + { + return SYMCRYPT_WRONG_ITERATION_COUNT; + } + + iBlock = 0; + while( cbResult > 0 ) + { + iBlock += 1; + SYMCRYPT_STORE_MSBFIRST32( &rbBlockResult[0], iBlock ); // use result buf as temp + + pExpandedKey->macAlg->initFunc ( &macState, &pExpandedKey->macKey); + pExpandedKey->macAlg->appendFunc( &macState, pbSalt, cbSalt); + pExpandedKey->macAlg->appendFunc( &macState, &rbBlockResult[0], 4 ); // block count encoded in 4 bytes + pExpandedKey->macAlg->resultFunc( &macState, rbWorkBuffer); + +#pragma warning(suppress: 22105) + memcpy( rbBlockResult, rbWorkBuffer, blockSize ); + for( iterations = 1; iterations < iterationCnt; iterations++ ) + { + pExpandedKey->macAlg->initFunc ( &macState, &pExpandedKey->macKey ); + pExpandedKey->macAlg->appendFunc( &macState, rbWorkBuffer, blockSize ); + pExpandedKey->macAlg->resultFunc( &macState, rbWorkBuffer ); + SymCryptXorBytes( &rbWorkBuffer[0], &rbBlockResult[0], &rbBlockResult[0], blockSize ); + } + + bytes = SYMCRYPT_MIN( cbResult, blockSize ); + memcpy( pbResult, rbBlockResult, bytes ); + pbResult += bytes; + cbResult -= bytes; + } + + SymCryptWipeKnownSize( &rbWorkBuffer[0], sizeof( rbWorkBuffer ) ); + SymCryptWipeKnownSize( &rbBlockResult[0], sizeof( rbBlockResult ) ); + return SYMCRYPT_NO_ERROR; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptPbkdf2ExpandKey( + _Out_ PSYMCRYPT_PBKDF2_EXPANDED_KEY pExpandedKey, + _In_ PCSYMCRYPT_MAC macAlgorithm, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ) +{ + SYMCRYPT_ASSERT( macAlgorithm->expandedKeySize <= sizeof( pExpandedKey->macKey ) ); + + pExpandedKey->macAlg = macAlgorithm; + return macAlgorithm->expandKeyFunc(&pExpandedKey->macKey, pbKey, cbKey ); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptPbkdf2( + PCSYMCRYPT_MAC macAlgorithm, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey, + _In_reads_opt_(cbSalt) PCBYTE pbSalt, + SIZE_T cbSalt, + UINT64 iterationCnt, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult) +{ + SYMCRYPT_PBKDF2_EXPANDED_KEY key; + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + scError = SymCryptPbkdf2ExpandKey( &key, macAlgorithm, pbKey, cbKey ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + scError = SymCryptPbkdf2Derive( &key, pbSalt, cbSalt, iterationCnt, pbResult, cbResult ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + +cleanup: + + SymCryptWipeKnownSize( &key, sizeof( key ) ); + + return scError; + +} + +// +// Self tests are in pbkdf_*.c files +// to avoid pulling in SHA-1 when only PBKDF-SHA256 is used and +// similar scenarios. +// diff --git a/libs/symcrypt/lib/pbkdf2_hmacsha1.c b/libs/symcrypt/lib/pbkdf2_hmacsha1.c new file mode 100644 index 00000000000..cf675a8b85e --- /dev/null +++ b/libs/symcrypt/lib/pbkdf2_hmacsha1.c @@ -0,0 +1,41 @@ +// +// pbkdf2_hmacsha1.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +// +// The PBKDF SHA-1 test +// This is in a separate module to avoid pulling in SHA-1 whenever we use PBKDF +// + +static const UINT64 pbkdf2_IterationCnt = 5; + +static const BYTE pbkdf2_sha1Answer[] = +{ + 0xef, 0xa9, 0xbf, 0xea, 0xa3, 0x4d, 0x70, 0x64, +}; + +VOID +SYMCRYPT_CALL +SymCryptPbkdf2_HmacSha1SelfTest(void) +{ + BYTE res[sizeof(pbkdf2_sha1Answer)]; + + SymCryptPbkdf2( + SymCryptHmacSha1Algorithm, + &SymCryptTestKey32[0], 8, + &SymCryptTestKey32[16], 16, + pbkdf2_IterationCnt, + res, + sizeof(res)); + + SymCryptInjectError( res, sizeof( res ) ); + + if (memcmp(res, pbkdf2_sha1Answer, sizeof(res)) !=0) + { + SymCryptFatal('Pbk2'); + } +} diff --git a/libs/symcrypt/lib/pbkdf2_hmacsha256.c b/libs/symcrypt/lib/pbkdf2_hmacsha256.c new file mode 100644 index 00000000000..ebef54bb81d --- /dev/null +++ b/libs/symcrypt/lib/pbkdf2_hmacsha256.c @@ -0,0 +1,41 @@ +// +// pbkdf2_hmacsha256.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +// +// The PBKDF SHA-256 test +// This is in a separate module to avoid pulling in SHA-256 whenever we use PBKDF +// + +static const UINT64 pbkdf2_IterationCnt = 5; + +static const BYTE pbkdf2_sha256Answer[] = +{ + 0x05, 0x98, 0x1e, 0x89, 0x48, 0xd2, 0x84, 0x61, +}; + +VOID +SYMCRYPT_CALL +SymCryptPbkdf2_HmacSha256SelfTest(void) +{ + BYTE res[sizeof(pbkdf2_sha256Answer)]; + + SymCryptPbkdf2( + SymCryptHmacSha256Algorithm, + &SymCryptTestKey32[0], 8, + &SymCryptTestKey32[16], 16, + pbkdf2_IterationCnt, + res, + sizeof(res)); + + SymCryptInjectError( res, sizeof( res ) ); + + if (memcmp(res, pbkdf2_sha256Answer, sizeof(res)) !=0) + { + SymCryptFatal('Pbk2'); + } +} diff --git a/libs/symcrypt/lib/poly1305.c b/libs/symcrypt/lib/poly1305.c new file mode 100644 index 00000000000..286f023d6b8 --- /dev/null +++ b/libs/symcrypt/lib/poly1305.c @@ -0,0 +1,468 @@ +// +// Poly1305.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +VOID +SYMCRYPT_CALL +SymCryptPoly1305ProcessBlocks( + _Inout_ PSYMCRYPT_POLY1305_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptPoly1305( + _In_reads_( SYMCRYPT_POLY1305_KEY_SIZE ) PCBYTE pbKey, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_POLY1305_RESULT_SIZE ) PBYTE pbResult ) +{ + SYMCRYPT_POLY1305_STATE state; + + SymCryptPoly1305Init( &state, pbKey ); + SymCryptPoly1305Append( &state, pbData, cbData ); + SymCryptPoly1305Result( &state, pbResult ); +} + +VOID +SYMCRYPT_CALL +SymCryptPoly1305Init( + _Out_ PSYMCRYPT_POLY1305_STATE pState, + _In_reads_( SYMCRYPT_POLY1305_KEY_SIZE ) PCBYTE pbKey ) +{ + pState->r[0] = SYMCRYPT_LOAD_LSBFIRST32( pbKey + 0 ) & 0x0fffffff; + pState->r[1] = SYMCRYPT_LOAD_LSBFIRST32( pbKey + 4 ) & 0x0ffffffc; + pState->r[2] = SYMCRYPT_LOAD_LSBFIRST32( pbKey + 8 ) & 0x0ffffffc; + pState->r[3] = SYMCRYPT_LOAD_LSBFIRST32( pbKey + 12 ) & 0x0ffffffc; + + pState->s[0] = SYMCRYPT_LOAD_LSBFIRST32( pbKey + 16 ); + pState->s[1] = SYMCRYPT_LOAD_LSBFIRST32( pbKey + 20 ); + pState->s[2] = SYMCRYPT_LOAD_LSBFIRST32( pbKey + 24 ); + pState->s[3] = SYMCRYPT_LOAD_LSBFIRST32( pbKey + 28 ); + + // Set accumulator to zero + SymCryptWipeKnownSize( &pState->a[0], sizeof( pState->a ) ); + + pState->bytesInBuffer = 0; +} + +VOID +SYMCRYPT_CALL +SymCryptPoly1305Append( + _Inout_ PSYMCRYPT_POLY1305_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + SIZE_T nBytes; + SIZE_T bytesInBuffer; + + bytesInBuffer = pState->bytesInBuffer; + if( bytesInBuffer != 0 ) + { + // We have a partial block in the buffer, keep filling the block + + SYMCRYPT_ASSERT( bytesInBuffer < 16 ); + nBytes = 16 - bytesInBuffer; + if( nBytes > cbData ) + { + nBytes = cbData; + } + + memcpy( &pState->buf[bytesInBuffer], pbData, nBytes ); + pbData += nBytes; + cbData -= nBytes; + bytesInBuffer += nBytes; + + if( bytesInBuffer == 16 ) + { + // Buffer is full, process it and empty the buffer + SymCryptPoly1305ProcessBlocks( pState, pState->buf, bytesInBuffer ); + bytesInBuffer = 0; + } + pState->bytesInBuffer = bytesInBuffer; + } + + if( cbData >= 16 ) + { + // There are whole blocks to process + SymCryptPoly1305ProcessBlocks( pState, pbData, cbData & ~0xf ); + pbData += cbData; + cbData &= 0xf; + pbData -= cbData; + } + + if( cbData > 0 ) + { + // Copy remaining data to buffer + SYMCRYPT_ASSERT( cbData < 16 ); + memcpy( &pState->buf[0], pbData, cbData ); + pState->bytesInBuffer = cbData; + } +} + +VOID +SYMCRYPT_CALL +SymCryptPoly1305Result( + _Inout_ PSYMCRYPT_POLY1305_STATE pState, + _Out_writes_( SYMCRYPT_POLY1305_RESULT_SIZE ) PBYTE pbResult ) +{ + SIZE_T bytesInBuffer; + UINT64 t; + UINT32 a4, a3, a2, a1, a0; + UINT32 maskOld, maskNew; + + bytesInBuffer = pState->bytesInBuffer; + if( bytesInBuffer > 0 ) + { + // Add trailing '1' byte and pad with zeroes + // Wipe function deals with 0-length wipes properly + pState->buf[bytesInBuffer++] = 1; + SymCryptWipe( &pState->buf[bytesInBuffer], 16 - bytesInBuffer ); + + // Now we have to process the block, but the block function adds a trailing + // 1 byte to each 16-byte block. We compensate for that by decrementing + // the highest word of the accumulator first; the 1 byte added by the block + // processing function has the effect of incrementing the highest accumulator + // word so those two operations cancel each other out. + pState->a[4] -= 1; + SymCryptPoly1305ProcessBlocks( pState, pState->buf, 16 ); + } + + // We have to fully reduce the accumulator first + // We have a[4]<6 at this point + a0 = pState->a[0]; + a1 = pState->a[1]; + a2 = pState->a[2]; + a3 = pState->a[3]; + a4 = pState->a[4]; + + SYMCRYPT_ASSERT( a4 < 6 ); + // Because a4 < 6, we have to subtract either 0*P or 1*P + // we subtract P and them mux-choose between the new and old value + // Subtracting P is the same as subtracting 2^130 and adding 5 + t = 5; + + t += a0; + a0 = (UINT32) t; + t >>= 32; + + t += a1; + a1 = (UINT32) t; + t >>= 32; + + t += a2; + a2 = (UINT32) t; + t >>= 32; + + t += a3; + a3 = (UINT32) t; + t >>= 32; + + t += a4; + t -= 4; + a4 = (UINT32) t; + t >>= 32; + + // If this subtraction produced a carry, then t = 0xffffffff, otherwise it is 0 + maskOld = (UINT32) t; // ffffffff if the old value is correct, 0 otherwise + maskNew = ~maskOld; // ffffffff if the new value is correct, 0 otherwise + + a0 = (maskNew & a0) | (maskOld & pState->a[0]); + a1 = (maskNew & a1) | (maskOld & pState->a[1]); + a2 = (maskNew & a2) | (maskOld & pState->a[2]); + a3 = (maskNew & a3) | (maskOld & pState->a[3]); + // a4 = (maskNew & a4) | (maskOld & pState->a[4]); // We don't need a4... + + // Now we add S and return the data + t = a0; + t += pState->s[0]; + SYMCRYPT_STORE_LSBFIRST32( pbResult + 0, (UINT32) t ); + t >>= 32; + + t += a1; + t += pState->s[1]; + SYMCRYPT_STORE_LSBFIRST32( pbResult + 4, (UINT32) t ); + t >>= 32; + + t += a2; + t += pState->s[2]; + SYMCRYPT_STORE_LSBFIRST32( pbResult + 8, (UINT32) t ); + t >>= 32; + + t += a3; + t += pState->s[3]; + SYMCRYPT_STORE_LSBFIRST32( pbResult + 12, (UINT32) t ); + + SymCryptWipeKnownSize( (PBYTE) pState, sizeof( *pState ) ); +} + + +/* +The heart of Poly1305 is a modular multiplication. +The modulus P := 2^130 - 5 + +One multiplicant is R which is part of the key. R is restricted to a subset of all possible +values ("clamped") to make the computation faster. +The other multiplicant is the accumulator A. The overall operation is + + A += <value derived from the data> + A = (A*R) mod P + +We write all values base 2^32: +b := 2^32 +A = a4 b^4 + a3 b^3 + a2 b^2 + a1 b + a1 +R = r3 b^3 + r2 b^2 + r1 b + r0 + +Fully reduced we would have a4 <= 3 but we don't store A in fully-reduced form. Instead +we maintain a4 < L with L:=8. + +The restrictions on R are: + r3, r2, r1, r0 < 2^28 + r3, r2, r1 are multiples of 4 + +The core algorithm looks like this (explanations below) + + + a4 a3 a2 a1 a0 + r3 r2 r1 r0 * +--------------------------------------- + a4r0 a3r0 a2r0 a1r0 a0r0 + a4r1 a3r1 a2r1 a1r1 a0r1 + a4r2 a3r2 a2r2 a1r1 a0r2 +a4r3 a3r3 a2r3 a1r3 a0r3 + +---------------------------------------- + S7 S6 S5 S4 S3 S2 S1 S0 + + S7 S6 S5 T4+U T3 S2 S1 S0 + + T3 S2 S1 S0 + S7 S6 S5 T4 + S7/4 S6/4 S5/4 T4/4 + + ------------------- + U V3 V2 V1 V0 + +At the top you see A and R with the 5*4 digit products arranged in columns. +The S values are the sums of the product columns without any carries. +Because the r values are <2^28 and a4 < L we have + + S0 <= 1*(2^32-1)(2^28-1) + S1 <= 2*(2^32-1)(2^28-1) + S2 <= 3*(2^32-1)(2^28-1) + S3 <= 4*(2^32-1)(2^28-1) + S4 <= 3*(2^32-1)(2^28-1) + (L-1)*(2^28-1) + S5 <= 2*(2^32-1)(2^28-1) + (L-1)*(2^28-1), multiple of 4 + S6 <= 1*(2^32-1)(2^28-1) + (L-1)*(2^28-1), multiple of 4 + S7 <= (L-1)*(2^28-1), multiple of 4 + +The next line defines T4, U, and T3 by +T3 := S3 mod b the lower word of S3 +T := S4 + floor(S3/b) add the upper word of S3 to S4 +U := T mod 4 +T4 := T - U Split T into a small value U and a bigger T4 that is a multiple of 4 + +note that the digits (S7,S6, S5, S4, S3, S2, S1, S0) and (S7, S6, S5, T4+U, T3, S2, S1, S0) +encode the same number, namely the result of the multiplication. + +We have bounds + floor(S3/b) <= 2^2 * (2^32-1) * (2^28-1) / 2^32 < 2^30 + T < 3*(2^32-1)(2^28-1) + (L-1)*(2^28-1) + 2^30 + U < 4 + T4 < 3*(2^32-1)(2^28-1) + (L-1)*(2^28-1) + 2^30, multiple of 4 + +Now we are ready to perform the modulo reduction. Because P = 2^130 - 5 we have for any value X + X*2^130 mod P = 5*X mod P +because 2^130 = 5 mod P +Or, if X is a multiple of 4 then + X*2^128 = (X + X/4) mod P +(this is just the previous equation divided by 4) +We apply that to S7, S6, S5, and T4 and add them (column wise) to (T3, S2, S1, S0) to get + +V0 := S0 + T4 + T4/4 +V1 := S1 + S5 + S5/4 +V2 := S2 + S6 + S6/4 +V3 := T3 + S7 + S7/4 + +and note that (U, V3, V2, V1, V0) is equal to the result of the multiplication modulo P +We can derive some bounds on these values + + We assume L <= 8 (will get strict bound later) + + V0 < 1*(2^32-1)(2^28-1) + 3*(2^32-1)(2^28-1) + (L-1)*(2^28-1) + 2^30 + (3*(2^32-1)(2^28-1) + (L-1)*(2^28-1) + 2^30)/4 + = 4*1*(2^32-1)(2^28-1) + (L-1)*(2^28-1) + 2^30 + (3*(2^32-1)(2^28-1) + (L-1)*(2^28-1) + 2^30)/4 + < 2^2 * 2^32 * 2^28 + 2^31 + 2^30 + (2^4 * 2^32 * 2^28 + 2^31 + 2^30)/4 + = 2^62 + 2^31 + 2^30 + 2^60 + 2^29 + 2^30 + < 2^63 + + V1 < 2*2^60 + 2*2^60 + 2^31 + (2*2^60 + 2^31)/4 < 2^63 + + V2 < 3*2^60 + 2^60 + 2^31 + (2^60 + 2^31) < 2^63 + + V3 < 2^32 + 2^31 + 2^29 < 2*2^32 + + U < 4 + +So all the V values fit in 64 bits. A final carry propagation pass cleans this up to an array of 32-bit values which become +the new accumulator value. (During carry propagation the 32-bit carry from the lower digit can be added to the higher digit +because the V values are less than 2^63.) + +V3 < 2*2^32 and after adding at most 2^32 from a carry it is < 3*2^32 so the carry from V3 to U is at most 2. +Thus the highest digit of the accumulator can be at most 3 + 2 = 5. This ensures a5<L for L>=6. We assumed L<=8 before, so +L=6 works and satisfies the earlier assumption. + +To clarify the logic: IF a4<8 at the start of the multiplication THEN a4<6 after this function. Between multiplications we add +a value < 2^129 which could result in adding 2 to a4, but as a4<6 before the addition the a4<8 before the multiplication +is still satisfied. +*/ + +VOID +SYMCRYPT_CALL +SymCryptPoly1305ProcessBlocks( + _Inout_ PSYMCRYPT_POLY1305_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +// This is the portable C implementation, based on 32-bit operations. +// If necessary, we'll add assembler code for this function later. +{ + UINT32 a0, a1, a2, a3, a4; + UINT32 r0, r1, r2, r3; + UINT64 t64; + UINT32 T3; + UINT32 V0, V1, V2; + UINT32 cy; + UINT32 U; + UINT32 t32; + + r0 = pState->r[0]; + r1 = pState->r[1]; + r2 = pState->r[2]; + r3 = pState->r[3]; + + a0 = pState->a[0]; + a1 = pState->a[1]; + a2 = pState->a[2]; + a3 = pState->a[3]; + a4 = pState->a[4]; + + // Here we have a4 < 6, but we sometimes decrement a4 to compensate for the + // 2^128 this function always adds. So we test a4 + 1 < 7 + SYMCRYPT_ASSERT( a4 + 1 < 7 ); + + while( cbData >= 16 ) + { + // Acc += data[0..15] + 2^128 + t64 = (UINT64) a0 + SYMCRYPT_LOAD_LSBFIRST32( pbData + 0 ); + a0 = (UINT32) t64; + t64 >>= 32; + + t64 += (UINT64) a1 + SYMCRYPT_LOAD_LSBFIRST32( pbData + 4 ); + a1 = (UINT32) t64; + t64 >>= 32; + + t64 += (UINT64) a2 + SYMCRYPT_LOAD_LSBFIRST32( pbData + 8 ); + a2 = (UINT32) t64; + t64 >>= 32; + + t64 += (UINT64) a3 + SYMCRYPT_LOAD_LSBFIRST32( pbData + 12 ); + a3 = (UINT32) t64; + t64 >>= 32; + + a4 = (UINT32) t64 + a4 + 1; // +1 is the padding '1' which we always apply + SYMCRYPT_ASSERT( a4 < 8 ); + + pbData += 16; + cbData -=16; + + // Compute S3 + t64 = SYMCRYPT_MUL32x32TO64( a3, r0 ) + + SYMCRYPT_MUL32x32TO64( a2, r1 ) + + SYMCRYPT_MUL32x32TO64( a1, r2 ) + + SYMCRYPT_MUL32x32TO64( a0, r3 ); + + SYMCRYPT_ASSERT( t64 < (1ULL << 62) ); + + T3 = (UINT32) t64; + t64 >>= 32; + + // Compute T = S4 + floor(S3/2^32). We have the floor part in t64 already + // now add S4 to it + t64 += a4*r0 // this fits in 32 bits as r0 < 2^28 and a4 < 8 + + SYMCRYPT_MUL32x32TO64( a3, r1 ) + + SYMCRYPT_MUL32x32TO64( a2, r2 ) + + SYMCRYPT_MUL32x32TO64( a1, r3 ); + + U = (UINT32) t64 & 3; + t64 &= ~3; // t64 = T4 here + + // Compute S0 + T4 + T4/4, and V0 + t64 += (t64 >> 2) + SYMCRYPT_MUL32x32TO64( a0, r0 ); + V0 = (UINT32)t64; + cy = (UINT32)(t64 >> 32); // the carry from S0 to S1 + + // Compute S5 + t64 = a4 * r1 + SYMCRYPT_MUL32x32TO64( a3, r2 ) + SYMCRYPT_MUL32x32TO64( a2, r3 ); + t64 += t64 >> 2; // = S5 + S5/4 + + t64 += SYMCRYPT_MUL32x32TO64( a1, r0 ) + SYMCRYPT_MUL32x32TO64( a0, r1 ); + // t64 = S1 + S5 + S5/4 + + t64 += cy; + V1 = (UINT32) t64; + cy = (UINT32)(t64 >> 32); // the carry from S1 to S2 + + // Compute S6 + t64 = a4 * r2 + SYMCRYPT_MUL32x32TO64( a3, r3 ); + t64 += t64 >> 2; // S6 + S6/4 + + // now add S2 + t64 += SYMCRYPT_MUL32x32TO64( a2, r0 ) + SYMCRYPT_MUL32x32TO64( a1, r1 ) + SYMCRYPT_MUL32x32TO64( a0, r2 ); + t64 += cy; + V2 = (UINT32) t64; + cy = (UINT32)(t64 >> 32); + + // Finally T3 + S7 + S7/4 + t32 = a4 * r3; // =S7, a 32-bit value + t32 += t32/4; + t64 = (UINT64) T3 + t32; + t64 += cy; + + a0 = V0; + a1 = V1; + a2 = V2; + a3 = (UINT32) t64; + a4 = U + (UINT32)(t64 >> 32); + + SYMCRYPT_ASSERT( a4 < 6 ); + } + + pState->a[0] = a0; + pState->a[1] = a1; + pState->a[2] = a2; + pState->a[3] = a3; + pState->a[4] = a4; +} + + +static const BYTE poly1305Kat[16] = { + 0xef, 0x9e, 0x73, 0x2a, 0x7f, 0x2d, 0xf1, 0x85, 0xa7, 0x11, 0x80, 0xae, 0x58, 0x3a, 0x0f, 0x93, +}; + + +VOID +SYMCRYPT_CALL +SymCryptPoly1305Selftest(void) +{ + BYTE res[16]; + + SymCryptPoly1305( SymCryptTestKey32, SymCryptTestMsg16, 16, res ); + + SymCryptInjectError( res, sizeof( res ) ); + + if( memcmp( res, poly1305Kat, sizeof( res ) ) != 0 ) + { + SymCryptFatal( 'p135'); + } +} diff --git a/libs/symcrypt/lib/precomp.h b/libs/symcrypt/lib/precomp.h new file mode 100644 index 00000000000..2b8f5dc710a --- /dev/null +++ b/libs/symcrypt/lib/precomp.h @@ -0,0 +1,26 @@ +// +// SymCrypt library pre-compiled header file +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#ifdef __cplusplus +#error C++ +#endif + +#include <stdlib.h> +#include <string.h> + +#include "symcrypt.h" +#include "sc_lib.h" + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 +#include <wmmintrin.h> +#include <immintrin.h> + + #if SYMCRYPT_GNUC + #include <x86intrin.h> // required for definition of _rdseed64_step for GCC 8 and earlier + #include <xsaveintrin.h> + #define _XCR_XFEATURE_ENABLED_MASK 0 + #endif +#endif diff --git a/libs/symcrypt/lib/primes.c b/libs/symcrypt/lib/primes.c new file mode 100644 index 00000000000..6c9fcbee96f --- /dev/null +++ b/libs/symcrypt/lib/primes.c @@ -0,0 +1,306 @@ +// +// primes.c +// Primality tests and prime number generation +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +UINT32 +SYMCRYPT_CALL +SymCryptIntMillerRabinPrimalityTest( + _In_ PCSYMCRYPT_INT piSrc, + UINT32 nBitsSrc, + UINT32 nIterations, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + BOOLEAN innerLoop = TRUE; + UINT32 borrow = 0; + + UINT32 nDigitsSrc = 0; + + UINT32 R = 1; + PSYMCRYPT_INT piD = NULL; + UINT32 cbD = 0; + PSYMCRYPT_MODULUS pmModulus = NULL; + UINT32 cbModulus = 0; + PSYMCRYPT_MODELEMENT peX = NULL; + UINT32 cbX = 0; + + PSYMCRYPT_MODELEMENT peOne = NULL; + PSYMCRYPT_MODELEMENT peMinOne = NULL; + + nDigitsSrc = SymCryptIntDigitsizeOfObject( piSrc ); + cbD = SymCryptSizeofIntFromDigits( nDigitsSrc ); + cbModulus = SymCryptSizeofModulusFromDigits( nDigitsSrc ); + + SYMCRYPT_ASSERT( nBitsSrc >= SymCryptIntBitsizeOfValue( piSrc ) ); + + SYMCRYPT_ASSERT( cbScratch >= cbModulus + SYMCRYPT_SCRATCH_BYTES_FOR_INT_TO_MODULUS(nDigitsSrc) ); + + // Allocate the modulus + pmModulus = SymCryptModulusCreate( pbScratch, cbModulus, nDigitsSrc ); + SYMCRYPT_ASSERT( pmModulus != NULL ); + pbScratch += cbModulus; + cbScratch -= cbModulus; + + // Set the modulus + SymCryptIntToModulus( + piSrc, + pmModulus, + nBitsSrc, // Average number of expected operations + SYMCRYPT_FLAG_MODULUS_PARITY_PUBLIC, + pbScratch, + cbScratch ); + + // Modelement size + cbX = SymCryptSizeofModElementFromModulus( pmModulus ); + + SYMCRYPT_ASSERT( cbScratch >= 3*cbX + cbD + + SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( nDigitsSrc ) ); + + peX = SymCryptModElementCreate( pbScratch, cbX, pmModulus ); + SYMCRYPT_ASSERT( peX != NULL ); + pbScratch += cbX; + cbScratch -= cbX; + + peOne = SymCryptModElementCreate( pbScratch, cbX, pmModulus ); + SYMCRYPT_ASSERT( peOne != NULL ); + pbScratch += cbX; + cbScratch -= cbX; + + peMinOne = SymCryptModElementCreate( pbScratch, cbX, pmModulus ); + SYMCRYPT_ASSERT( peMinOne != NULL ); + pbScratch += cbX; + cbScratch -= cbX; + + // Allocate D + piD = SymCryptIntCreate( pbScratch, cbD, nDigitsSrc ); + SYMCRYPT_ASSERT( piD != NULL ); + pbScratch += cbD; + cbScratch -= cbD; + + // Calculate (piSrc - 1) + // Note: We should never get a borrow here because the requirement + // is that Src > 3. + SymCryptIntCopy( piSrc, piD ); + borrow = SymCryptIntSubUint32( piD, 1, piD ); + SYMCRYPT_ASSERT( borrow==0 ); + + SYMCRYPT_ASSERT( SymCryptIntGetBit( piD, 0 ) == 0 ); + + // Check the 3 mod 4 requirement when side-channel safe + SYMCRYPT_ASSERT( + ((flags & SYMCRYPT_FLAG_DATA_PUBLIC) != 0) || + (SymCryptIntGetBit( piD, 1 )!=0) ); + UNREFERENCED_PARAMETER( flags ); + + // Calculate R and D such that Src - 1 = D*2^R + // Notice that the loop executes only if + // the SYMCRYPT_FLAG_DATA_PUBLIC is + // specified (and Src != 3 mod 4) + R = 1; + while( SymCryptIntGetBit( piD, R )==0 ) + { + R++; + } + SymCryptIntDivPow2( piD, R, piD ); + + // Set peOne and peMinOne + SymCryptModElementSetValueUint32( 1, pmModulus, peOne, pbScratch, cbScratch ); + SymCryptModElementSetValueNegUint32( 1, pmModulus, peMinOne, pbScratch, cbScratch ); + + for (UINT32 i=0; i<nIterations; i++) + { + // Pick a random X in [2, piSrc-2] + // Therefore the flags parameter is 0 (default: not allowed 0, 1, -1 when modulus > 3) + SymCryptModSetRandom( pmModulus, peX, 0, pbScratch, cbScratch ); + + // X^D mod piSrc + // Notice that nBitsSrc is public in the call of SymCryptModExp + SymCryptModExp( pmModulus, peX, piD, nBitsSrc, 0, peX, pbScratch, cbScratch ); + + // Check for 1 or -1 + if ( SymCryptModElementIsEqual( pmModulus, peX, peOne ) | + SymCryptModElementIsEqual( pmModulus, peX, peMinOne ) ) + { + continue; + } + + // repeat R-1 times + // Notice that the inner loop executes only if + // the SYMCRYPT_FLAG_DATA_PUBLIC is + // specified (and Src != 3 mod 4) + innerLoop = TRUE; + for (UINT32 j=0; (j<R-1)&&(innerLoop); j++) + { + // Square X + SymCryptModSquare( pmModulus, peX, peX, pbScratch, cbScratch ); + + // Check if it is 1 + if (SymCryptModElementIsEqual( pmModulus, peX, peOne )) + { + return 0x0; + } + + // Check if it is -1 + if (SymCryptModElementIsEqual( pmModulus, peX, peMinOne )) + { + innerLoop = FALSE; + break; + } + } + + if (innerLoop) + { + return 0x0; + } + } + + return 0xffffffff; // Prime +} + +#define SYMCRYPT_PRIME_GENERATION_MR_ITERATIONS (64) + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptIntGenerateRandomPrime( + _In_ PCSYMCRYPT_INT piLow, + _In_ PCSYMCRYPT_INT piHigh, + _In_reads_opt_( nPubExp ) PCUINT64 pu64PubExp, + UINT32 nPubExp, + UINT32 nTries, + UINT32 flags, + _Inout_ PSYMCRYPT_INT piDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_EXTERNAL_FAILURE; + PSYMCRYPT_DIVISOR pdPubExp[ SYMCRYPT_RSAKEY_MAX_NUMOF_PUBEXPS ]; + PSYMCRYPT_INT piTmp; + + UINT32 cnt = 0; + UINT32 e; + BOOLEAN reject; + SIZE_T cbObj; + + UINT32 nBits = SymCryptIntBitsizeOfObject(piDst); + UINT32 nBytes = (nBits + 7)/8; + + UINT32 nBitsHigh = SymCryptIntBitsizeOfValue( piHigh ); + + PCSYMCRYPT_TRIALDIVISION_CONTEXT pTrialDivisionContext = SymCryptCreateTrialDivisionContext( SymCryptIntDigitsizeOfObject( piHigh ) ); + + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_INT_PRIME_GEN( SymCryptIntDigitsizeOfObject( piDst ) ) ); + SYMCRYPT_ASSERT( nPubExp <= SYMCRYPT_RSAKEY_MAX_NUMOF_PUBEXPS ); + SYMCRYPT_ASSERT( SymCryptDigitsFromBits( 64 ) == 1 ); + + UNREFERENCED_PARAMETER( flags ); + + // Allocate divisor objects for each public exponent & initialize them + cbObj = SymCryptSizeofDivisorFromDigits( 1 ); + for( e = 0; e < nPubExp; e++ ) + { + SYMCRYPT_ASSERT( cbScratch >= cbObj ); + if( pu64PubExp[e] == 0 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto exit; + } + + pdPubExp[e] = SymCryptDivisorCreate( pbScratch, cbObj, 1 ); + pbScratch += cbObj; + cbScratch -= cbObj; + + SymCryptIntSetValueUint64( pu64PubExp[e], SymCryptIntFromDivisor( pdPubExp[e] ) ); + SymCryptIntToDivisor( SymCryptIntFromDivisor( pdPubExp[e] ), pdPubExp[e], 1000, SYMCRYPT_FLAG_DATA_PUBLIC, pbScratch, cbScratch ); + } + + cbObj = SymCryptSizeofIntFromDigits( 1 ); + SYMCRYPT_ASSERT( cbScratch >= cbObj + nBytes ); + piTmp = SymCryptIntCreate( pbScratch, cbObj, 1 ); + pbScratch += cbObj; + cbScratch -= cbObj; + + do + { + cnt++; + + scError = SymCryptCallbackRandom( pbScratch, nBytes ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto exit; + } + + scError = SymCryptIntSetValue( pbScratch, nBytes, SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, piDst ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto exit; + } + + // Set the integer to 3 mod 4 + SymCryptIntSetBits( piDst, 3, 0, 2 ); + + // Zero out the top bits above the upper limit + SymCryptIntModPow2( piDst, nBitsHigh, piDst ); + + // Check if it is in the correct range + if ( (SymCryptIntIsLessThan( piDst, piLow )) || + (!SymCryptIntIsLessThan( piDst, piHigh )) ) + { + continue; + } + + // Fast compositeness check + if( SymCryptIntFindSmallDivisor( pTrialDivisionContext, piDst, NULL, 0 ) != 0 ) + { + // We found a small divisor; it is not a prime + continue; + } + + // Check for compatibility with public exponents (if provided) + reject = FALSE; + for( e = 0; e < nPubExp; e++ ) + { + SymCryptIntDivMod( piDst, pdPubExp[e], NULL, piTmp, pbScratch, cbScratch ); + + // Check that e has a modular inverse mod P-1 + // If e and P-1 are coprime, or GCD( P-1, e ) == 1, then e^-1 exists + // We have (P mod e) in piTmp. + // If piTmp == 0 then P is divisible by e, and will fail primality test - we don't care about the result of the GCD + // Otherwise, GCD( (P mod e)-1, e ) == GCD( P-1 mod e, e ) == GCD( P-1, e ) + // + // Note that if P-1 is a multiple of e then (P mod e)-1 == 0, and GCD( 0, e ) == e + if( SymCryptUint64Gcd( pu64PubExp[e], SymCryptIntGetValueLsbits64( piTmp ) - 1, SYMCRYPT_FLAG_GCD_INPUTS_NOT_BOTH_EVEN ) != 1 ) + { + // We can't continue the big loop from here :-( + reject = TRUE; + break; + } + } + if( reject ) + { + continue; + } + + // Primality check + if (SymCryptIntMillerRabinPrimalityTest( piDst, nBitsHigh, SYMCRYPT_PRIME_GENERATION_MR_ITERATIONS, 0, pbScratch, cbScratch )) + { + scError = SYMCRYPT_NO_ERROR; + break; + } + } + while (cnt<nTries); + + if (cnt>=nTries) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + } + +exit: + SymCryptFreeTrialDivisionContext( pTrialDivisionContext ); + return scError; +} diff --git a/libs/symcrypt/lib/rc2.c b/libs/symcrypt/lib/rc2.c new file mode 100644 index 00000000000..d17807fd0dc --- /dev/null +++ b/libs/symcrypt/lib/rc2.c @@ -0,0 +1,438 @@ +// +// Rc2.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +// +// This module contains the routines to implement RC2 from RFC 2268 +// +// This is a new implementation, based on the RFC specification +// and NOT based on the existing one in RSA32.lib, which is the one from RSA data security. +// + + +#include "precomp.h" + +const SYMCRYPT_BLOCKCIPHER SymCryptRc2BlockCipher_default = { + SymCryptRc2ExpandKey, // PSYMCRYPT_BLOCKCIPHER_EXPAND_KEY expandKeyFunc; + SymCryptRc2Encrypt, // PSYMCRYPT_BLOCKCIPHER_CRYPT encryptFunc; + SymCryptRc2Decrypt, // PSYMCRYPT_BLOCKCIPHER_CRYPT decryptFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_CRYPT_ECB ecbEncryptFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_CRYPT_ECB ecbDecryptFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_CRYPT_MODE cbcEncryptFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_CRYPT_MODE cbcDecryptFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_MAC_MODE cbcMacFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_CRYPT_MODE ctrMsbFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_AEADPART_MODE gcmEncryptPartFunc; + NULL, // PSYMCRYPT_BLOCKCIPHER_AEADPART_MODE gcmDecryptPartFunc; + 8, // SIZE_T blockSize; + sizeof( SYMCRYPT_RC2_EXPANDED_KEY ), // SIZE_T expandedKeySize; // = sizeof( SYMCRYPT_XXX_EXPANDED_KEY ) +}; + +const PCSYMCRYPT_BLOCKCIPHER SymCryptRc2BlockCipher = &SymCryptRc2BlockCipher_default; + +/* + * + * constants + * + */ +static const BYTE PITABLE[] = +{ + 0xd9, 0x78, 0xf9, 0xc4, 0x19, 0xdd, 0xb5, 0xed, 0x28, 0xe9, 0xfd, 0x79, 0x4a, 0xa0, 0xd8, 0x9d, + 0xc6, 0x7e, 0x37, 0x83, 0x2b, 0x76, 0x53, 0x8e, 0x62, 0x4c, 0x64, 0x88, 0x44, 0x8b, 0xfb, 0xa2, + 0x17, 0x9a, 0x59, 0xf5, 0x87, 0xb3, 0x4f, 0x13, 0x61, 0x45, 0x6d, 0x8d, 0x09, 0x81, 0x7d, 0x32, + 0xbd, 0x8f, 0x40, 0xeb, 0x86, 0xb7, 0x7b, 0x0b, 0xf0, 0x95, 0x21, 0x22, 0x5c, 0x6b, 0x4e, 0x82, + 0x54, 0xd6, 0x65, 0x93, 0xce, 0x60, 0xb2, 0x1c, 0x73, 0x56, 0xc0, 0x14, 0xa7, 0x8c, 0xf1, 0xdc, + 0x12, 0x75, 0xca, 0x1f, 0x3b, 0xbe, 0xe4, 0xd1, 0x42, 0x3d, 0xd4, 0x30, 0xa3, 0x3c, 0xb6, 0x26, + 0x6f, 0xbf, 0x0e, 0xda, 0x46, 0x69, 0x07, 0x57, 0x27, 0xf2, 0x1d, 0x9b, 0xbc, 0x94, 0x43, 0x03, + 0xf8, 0x11, 0xc7, 0xf6, 0x90, 0xef, 0x3e, 0xe7, 0x06, 0xc3, 0xd5, 0x2f, 0xc8, 0x66, 0x1e, 0xd7, + 0x08, 0xe8, 0xea, 0xde, 0x80, 0x52, 0xee, 0xf7, 0x84, 0xaa, 0x72, 0xac, 0x35, 0x4d, 0x6a, 0x2a, + 0x96, 0x1a, 0xd2, 0x71, 0x5a, 0x15, 0x49, 0x74, 0x4b, 0x9f, 0xd0, 0x5e, 0x04, 0x18, 0xa4, 0xec, + 0xc2, 0xe0, 0x41, 0x6e, 0x0f, 0x51, 0xcb, 0xcc, 0x24, 0x91, 0xaf, 0x50, 0xa1, 0xf4, 0x70, 0x39, + 0x99, 0x7c, 0x3a, 0x85, 0x23, 0xb8, 0xb4, 0x7a, 0xfc, 0x02, 0x36, 0x5b, 0x25, 0x55, 0x97, 0x31, + 0x2d, 0x5d, 0xfa, 0x98, 0xe3, 0x8a, 0x92, 0xae, 0x05, 0xdf, 0x29, 0x10, 0x67, 0x6c, 0xba, 0xc9, + 0xd3, 0x00, 0xe6, 0xcf, 0xe1, 0x9e, 0xa8, 0x2c, 0x63, 0x16, 0x01, 0x3f, 0x58, 0xe2, 0x89, 0xa9, + 0x0d, 0x38, 0x34, 0x1b, 0xab, 0x33, 0xff, 0xb0, 0xbb, 0x48, 0x0c, 0x5f, 0xb9, 0xb1, 0xcd, 0x2e, + 0xc5, 0xf3, 0xdb, 0x47, 0xe5, 0xa5, 0x9c, 0x77, 0x0a, 0xa6, 0x20, 0x68, 0xfe, 0x7f, 0xc1, 0xad +}; + +/* + * + * macros + * + */ + + +/* + * These are the original macros we derived directly from the RFC. + * To improve the perf we changed to using R0, R1, R2, R3 variables rather + * than an array. + */ + +/* +#define MIX(R, K, i, j, S) {\ + R[i] = R[i] + K[j] + (R[(i-1)&3] & R[(i-2)&3]) + ((~R[(i-1)&3]) & R[(i-3)&3]);\ + j = j + 1;\ + R[i] = ROL16(R[i], S);\ + } + +#define MIXROUND(R, K, j) {\ + MIX(R, K, 0, j, 1);\ + MIX(R, K, 1, j, 2);\ + MIX(R, K, 2, j, 3);\ + MIX(R, K, 3, j, 5);\ + } + +#define MASH(R, K, i) \ + R[i] = R[i] + K[R[(i-1)&3]&63]; + +#define MASHROUND(R, K) {\ + MASH(R, K, 0);\ + MASH(R, K, 1);\ + MASH(R, K, 2);\ + MASH(R, K, 3);\ + } + +// +// decrypt macros +// + + +#define RMIX(R, K, i, j, S) {\ + R[i] = ROR16( R[i], S );\ + R[i] = R[i] - K[j] - (R[(i-1)&3] & R[(i-2)&3]) - ((~R[(i-1)&3]) & R[(i-3)&3]);\ + j = j - 1;\ + } + +#define RMIXROUND(R, K, j) {\ + RMIX(R, K, 3, j, 5);\ + RMIX(R, K, 2, j, 3);\ + RMIX(R, K, 1, j, 2);\ + RMIX(R, K, 0, j, 1);\ + } + +#define RMASH(R, K, i) \ + R[i] = R[i] - K[R[(i-1)&3] & 63]; + +#define RMASHROUND(R, K) {\ + RMASH(R, K, 3);\ + RMASH(R, K, 2);\ + RMASH(R, K, 1);\ + RMASH(R, K, 0);\ + } +*/ + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRc2ExpandKeyEx( + _Out_ PSYMCRYPT_RC2_EXPANDED_KEY pExpandedKey, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey, + UINT32 effectiveKeySizeInBits ) +{ + SYMCRYPT_ALIGN BYTE L[128]; + UINT32 T; + UINT32 T1; + UINT32 T8; + UINT32 TM; + int i; + + SYMCRYPT_SET_MAGIC( pExpandedKey ); + + // + // According to RFC 2268 any key size in 1..128 is allowed. + // + // The effective key size cannot be 0 as the RFC specs would lead to a buffer overflow + // in the key expansion. + // + // If the effective key size <= 8 then T8=1 and the key expansion backward recursion + // drops into a fixed point because L[i+1] xor L[i+T8] is zero. + // Therefore, we require an effective key size of at least 9. + // + if( cbKey < 1 || cbKey > 128 || effectiveKeySizeInBits < 9 || effectiveKeySizeInBits > 8*128 ) + { + return SYMCRYPT_WRONG_KEY_SIZE; + } + + T = (UINT32)cbKey; // 1 <= T1 <= 128 + + T1 = effectiveKeySizeInBits; // 9 <= T1 <= 1024 + T8 = (T1+7)/8; // 2 <= T8 <= 128 + + TM = 255 & ((1 << (8 + (UINT32)T1 - 8*T8))-1); + + // + // To be endian-agnostic our expanded key is stored as an array of UINT16s. We do the key + // expansion in a local buffer and copy the values into the expanded key using the proper conversion. + // + memcpy(L, pbKey, T); + + for(i = T; i <= 127; i++) + { + L[i] = PITABLE[(L[i-1]+L[i-T]) & 0xff]; + // + // If the key size T=1 then we lose one bit of key space in the key expansion because + // L[i-1] == L[i-T] which makes the index to PITABLE even. So L[1..128] depend only on + // 7 bits. + // + } + + L[128-T8] = PITABLE[L[128-T8] & TM]; + + for( i = 127-T8; i >=0; i--) + { + L[i] = PITABLE[L[i+1] ^ L[i+T8]]; + } + + // + // Now we copy the result into the UINT16 array in our expanded key. + // This is a memcpy for little-endian platforms, but this code works on all CPUs. + // + for( i=0; i<64; i++ ) + { + pExpandedKey->K[i] = SYMCRYPT_LOAD_LSBFIRST16( &L[2*i] ); + } + + SymCryptWipeKnownSize( L, sizeof( L ) ); + + return SYMCRYPT_NO_ERROR; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRc2ExpandKey( + _Out_ PSYMCRYPT_RC2_EXPANDED_KEY pExpandedKey, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ) +{ + return SymCryptRc2ExpandKeyEx( pExpandedKey, pbKey, cbKey, (UINT32) (8*cbKey) ); +} + + +#define MIXROUND( n ) {\ + R0 = R0 + K[4*n] + (R3 & R2) + (~R3 & R1); \ + R0 = ROL16( R0, 1 ); \ + R1 = R1 + K[4*n+1] + (R0 & R3) + (~R0 & R2 ); \ + R1 = ROL16( R1, 2 ); \ + R2 = R2 + K[4*n+2] + (R1 & R0) + (~R1 & R3); \ + R2 = ROL16( R2, 3 ); \ + R3 = R3 + K[4*n+3] + (R2 & R1) + (~R2 & R0 ); \ + R3 = ROL16( R3, 5 ); \ + } + +#define MASHROUND() { \ + R0 = R0 + K[R3 & 63]; \ + R1 = R1 + K[R0 & 63]; \ + R2 = R2 + K[R1 & 63]; \ + R3 = R3 + K[R2 & 63]; \ + } + +VOID +SYMCRYPT_CALL +SymCryptRc2Encrypt( + _In_ PCSYMCRYPT_RC2_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_RC2_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_RC2_BLOCK_SIZE ) PBYTE pbDst ) +{ + UINT16 R0, R1, R2, R3; + PCUINT16 K; + + SYMCRYPT_CHECK_MAGIC( pExpandedKey ); + + // + // 1. Initialize words R[0], ..., R[3] to contain the 64-bit plaintext value. + // + R0 = SYMCRYPT_LOAD_LSBFIRST16( &pbSrc[0] ); + R1 = SYMCRYPT_LOAD_LSBFIRST16( &pbSrc[2] ); + R2 = SYMCRYPT_LOAD_LSBFIRST16( &pbSrc[4] ); + R3 = SYMCRYPT_LOAD_LSBFIRST16( &pbSrc[6] ); + + // + // 2. Expand the key, so that words K[0], ..., K[63] become defined. + // (In our case the key was previously expanded, so we just grab the pointer to it.) + // + K = pExpandedKey->K; + + // + // 3. Initialize j to zero. + // + + // + // 4. Perform five mixing rounds. + // + MIXROUND(0); + MIXROUND(1); + MIXROUND(2); + MIXROUND(3); + MIXROUND(4); + + // + // 5. Perform one mashing round. + // + MASHROUND(); + + // + // 6. Perform six mixing rounds. + // + MIXROUND(5); + MIXROUND(6); + MIXROUND(7); + MIXROUND(8); + MIXROUND(9); + MIXROUND(10); + + // + // 7. Perform one mashing round. + // + MASHROUND(); + + // + // 8. Perform five mixing rounds. + // + MIXROUND(11); + MIXROUND(12); + MIXROUND(13); + MIXROUND(14); + MIXROUND(15); + + SYMCRYPT_STORE_LSBFIRST16( &pbDst[0], R0 ); + SYMCRYPT_STORE_LSBFIRST16( &pbDst[2], R1 ); + SYMCRYPT_STORE_LSBFIRST16( &pbDst[4], R2 ); + SYMCRYPT_STORE_LSBFIRST16( &pbDst[6], R3 ); + +} + + +#define RMIXROUND( n ) {\ + R3 = ROR16( R3, 5 ); \ + R3 = R3 - K[4*n+3] - (R2 & R1) - (~R2 & R0 ); \ + R2 = ROR16( R2, 3 ); \ + R2 = R2 - K[4*n+2] - (R1 & R0) - (~R1 & R3); \ + R1 = ROR16( R1, 2 ); \ + R1 = R1 - K[4*n+1] - (R0 & R3) - (~R0 & R2 ); \ + R0 = ROR16( R0, 1 ); \ + R0 = R0 - K[4*n ] - (R3 & R2) - (~R3 & R1); \ + } + +#define RMASHROUND() { \ + R3 = R3 - K[R2 & 63]; \ + R2 = R2 - K[R1 & 63]; \ + R1 = R1 - K[R0 & 63]; \ + R0 = R0 - K[R3 & 63]; \ + } + + + +VOID +SYMCRYPT_CALL +SymCryptRc2Decrypt( + _In_ PCSYMCRYPT_RC2_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_RC2_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_RC2_BLOCK_SIZE ) PBYTE pbDst ) +{ + UINT16 R0, R1, R2, R3; + PCUINT16 K; + + SYMCRYPT_CHECK_MAGIC( pExpandedKey ); + + // + // 1. Initialize words R[0], ..., R[3] to contain the 64-bit plaintext value. + // + R0 = SYMCRYPT_LOAD_LSBFIRST16( &pbSrc[0] ); + R1 = SYMCRYPT_LOAD_LSBFIRST16( &pbSrc[2] ); + R2 = SYMCRYPT_LOAD_LSBFIRST16( &pbSrc[4] ); + R3 = SYMCRYPT_LOAD_LSBFIRST16( &pbSrc[6] ); + + // + // 2. Expand the key, so that words K[0], ..., K[63] become defined. + // (In our case the key was previously expanded, so we just grab the pointer to it.) + // + K = pExpandedKey->K; + + // + // 3. Initialize j to 63. + // + + // + // 4. Perform five r-mixing rounds. + // + RMIXROUND(15); + RMIXROUND(14); + RMIXROUND(13); + RMIXROUND(12); + RMIXROUND(11); + + // + // 5. Perform one r-mashing round. + // + RMASHROUND(); + + // + // 6. Perform six r-mixing rounds. + // + RMIXROUND(10); + RMIXROUND(9); + RMIXROUND(8); + RMIXROUND(7); + RMIXROUND(6); + RMIXROUND(5); + + // + // 7. Perform one r-mashing round. + // + RMASHROUND(); + + // + // 8. Perform five r-mixing rounds. + // + RMIXROUND(4); + RMIXROUND(3); + RMIXROUND(2); + RMIXROUND(1); + RMIXROUND(0); + + SYMCRYPT_STORE_LSBFIRST16( &pbDst[0], R0 ); + SYMCRYPT_STORE_LSBFIRST16( &pbDst[2], R1 ); + SYMCRYPT_STORE_LSBFIRST16( &pbDst[4], R2 ); + SYMCRYPT_STORE_LSBFIRST16( &pbDst[6], R3 ); + +} + +static const BYTE testPlaintext[8] = { 'P', 'l', 'a', 'i', 'n', 't', 'x', 't', }; +static const BYTE testCiphertext[8] = { + 0x89, 0xe8, 0x5d, 0x1a, 0x98, 0xcd, 0xe5, 0x52, +}; + +VOID +SYMCRYPT_CALL +SymCryptRc2Selftest(void) +{ + SYMCRYPT_RC2_EXPANDED_KEY key; + BYTE buf[SYMCRYPT_RC2_BLOCK_SIZE]; + + if( SymCryptRc2ExpandKeyEx( &key, SymCryptTestKey32, 16, 87) != SYMCRYPT_NO_ERROR ) + { + SymCryptFatal( 'rc21' ); + } + + SymCryptRc2Encrypt( &key, testPlaintext, buf ); + + SymCryptInjectError( buf, SYMCRYPT_RC2_BLOCK_SIZE ); + + if( memcmp( buf, testCiphertext, SYMCRYPT_RC2_BLOCK_SIZE ) != 0 ) + { + SymCryptFatal( 'rc22' ); + } + + SymCryptRc2Decrypt( &key, testCiphertext, buf ); + + SymCryptInjectError( buf, SYMCRYPT_RC2_BLOCK_SIZE ); + + if( memcmp( buf, testPlaintext, SYMCRYPT_RC2_BLOCK_SIZE ) != 0 ) + { + SymCryptFatal( 'rc23' ); + } + +} diff --git a/libs/symcrypt/lib/rc4.c b/libs/symcrypt/lib/rc4.c new file mode 100644 index 00000000000..914771c5aa5 --- /dev/null +++ b/libs/symcrypt/lib/rc4.c @@ -0,0 +1,156 @@ +// +// Rc4.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// This is a new implementation, NOT based on the existing ones in RSA32.lib. +// The algorithm specification is taken from "ARCFOUR Algorithm" internet +// draft dated July 1999, and from memory. +// + +#include "precomp.h" + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRc4Init( + _Out_ PSYMCRYPT_RC4_STATE pState, + _In_reads_( cbKey ) PCBYTE pbKey, + _In_ SIZE_T cbKey ) +{ + SIZE_T i; + SIZE_T j; + BYTE keyBuf[256]; + SIZE_T keyIdx; + + SYMCRYPT_RC4_S_TYPE T; + + if( cbKey > 256 || cbKey == 0 ) + { + return SYMCRYPT_WRONG_KEY_SIZE; + } + + // + // Make a copy of the key to obey the read-once rule. + // This is a case where it looks safe to break the read-once + // rule, but it isn't. RC4 with very long keys (e.g. 256 bytes) + // is actually very vulnerable against related-key attacks. + // One obvious precaution is to limit the length of the RC4 key, + // which one of the layers above us might do. + // Allowing the key bytes to change as we read them negates + // this countermeasure. + // + memcpy( keyBuf, pbKey, cbKey ); + + for( i=0; i<256; i++ ) + { + pState->S[i] = (SYMCRYPT_RC4_S_TYPE) i; + } + + j = 0; + keyIdx = 0; + for( i=0; i<256; i++ ) + { + + T = pState->S[i]; + j = (j + T + keyBuf[keyIdx]) & 0xff; + pState->S[i] = pState->S[j]; + pState->S[j] = T; + keyIdx++; + if( keyIdx == cbKey ) + { + keyIdx = 0; + } + } + + // + // We store the i value already incremented for the next byte. + // This seems to allow better instruction sequencing interleaving in the actual en/decrypt loop + // + pState->i = 1; + pState->j = 0; + + SYMCRYPT_SET_MAGIC( pState ); + + SymCryptWipe( keyBuf, cbKey ); + + return SYMCRYPT_NO_ERROR; +} + + +VOID +SYMCRYPT_CALL +SymCryptRc4Crypt( + _Inout_ PSYMCRYPT_RC4_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + _In_ SIZE_T cbData ) +{ + SIZE_T i; + SIZE_T j; + SYMCRYPT_RC4_S_TYPE Ti; + SYMCRYPT_RC4_S_TYPE Tj; + PCBYTE pbSrcEnd = pbSrc + cbData; + + SYMCRYPT_CHECK_MAGIC( pState ); + + i = pState->i; + j = pState->j; + + // + // I tried to unroll this loop 4x and use a single 32-bit operation to XOR the key + // stream with the data. This actually makes the code slower by 1 c/B on a Core 2. + // I suspect that that is because the instruction decoders are the bottleneck, and + // a small loop can be run out of the uop queue which bypasses the instruction decoders. + // A larger loop has to be decoded every time, and that slows things down. + // The theoretical gain of unrolling the loop is less than 1 c/B, + // and as Core 2 and derived CPUs are the most commonly used CPUs by our customers, + // it is not worthwhile to persue this further. + // + // - Niels Ferguson (niels) 2010-10-11 + // + + while( pbSrc < pbSrcEnd ) + { + // + // Our i value is already incremented + // + Ti = pState->S[i]; + j = (j + Ti ) & 0xff; + Tj = pState->S[j]; + pState->S[i] = Tj; + pState->S[j] = Ti; + *pbDst = (BYTE) (*pbSrc ^ pState->S[(Ti + Tj) & 0xff]); + + i = (i + 1) & 0xff; + + pbSrc++; + pbDst++; + } + + pState->i = (BYTE) i; + pState->j = (BYTE) j; +} + + +static const BYTE rc4KatAnswer[ 3 ] = { 0x71, 0x46, 0x92 }; + + +VOID +SYMCRYPT_CALL +SymCryptRc4Selftest(void) +{ + BYTE buf[3]; + SYMCRYPT_RC4_STATE state; + + SymCryptRc4Init( &state, SymCryptTestKey32, sizeof( SymCryptTestKey32 ) ); + + SymCryptRc4Crypt( &state, SymCryptTestMsg3, buf, sizeof( buf ) ); + + SymCryptInjectError( buf, sizeof( buf ) ); + + if( memcmp( buf, rc4KatAnswer, sizeof( buf )) != 0 ) + { + SymCryptFatal( 'rc4 ' ); + } + +} diff --git a/libs/symcrypt/lib/rdrand.c b/libs/symcrypt/lib/rdrand.c new file mode 100644 index 00000000000..79e8d951e3a --- /dev/null +++ b/libs/symcrypt/lib/rdrand.c @@ -0,0 +1,172 @@ +// +// rdrand.c Support for RdRand instruction +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +#if (SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64) // only available on x86 and amd64 architectures + +#ifdef __clang__ +#pragma clang attribute push (__attribute__((target("rdrnd"))), apply_to=function) +#else +#pragma GCC push_options +#pragma GCC target("rdrnd") +#endif + +#if SYMCRYPT_MS_VC && _MSC_VER < 1610 +#error MSVC version lacks support for RDRAND intrinsics. Compile for the generic environment instead. +#endif + +// +// TODO: the _rdrand_u*() versions of the intrinsics can be removed once the new compiler +// with the _rdrand*_step() intrinsics is used in all branches + +#if SYMCRYPT_MS_VC && _MSC_VER < 1700 // 1700 = Dev11, + +// +// This is the code that uses the old intrinsics in the compiler version 16.1 +// + +unsigned int _rdrand_u32(void); +unsigned __int64 _rdrand_u64(void); + + +#if SYMCRYPT_CPU_X86 +#define SymCryptRdrandSizet(p) ( *(p)=(SIZE_T)_rdrand_u32(), SYMCRYPT_NO_ERROR ) +#else +#define SymCryptRdrandSizet(p) ( *(p)=(SIZE_T)_rdrand_u64(), SYMCRYPT_NO_ERROR ) +#endif + +#else // _MSC_VER + +// +// Code for the new Dev11 intrinsics +// + +#if SYMCRYPT_CPU_X86 +#define _rdrandxx_step(_p) _rdrand32_step( (unsigned int *) (_p) ) +#else +#define _rdrandxx_step(_p) _rdrand64_step( (UINT64 *) (_p) ) +#endif + +FORCEINLINE +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRdrandSizet( SIZE_T * p ) +{ + int i; + + // + // In Win8/WinBlue we iterated 1000 times. + // But we got a crash bucket where we fail because of + // not getting any random data. + // I contacted the Intel people; according to them they cannot make the + // RDRAND instruction fail more than a dozen times in a row under any tested + // circumstance. They have no idea how it could fail 1000 times in a row. + // As a failure of this code leads to a bugcheck (it fails a security promise, and + // is therefore treated as a critical security bug) I have increased the + // iteration count to 1000000. + // This will not affect any machine that didn't bugcheck before, but it hopefully + // will remove some of the current bugchecks. + // + // Niels Ferguson (niels) 2014-04-09. + // + for( i=0; i<1000000; i++ ) + { + if( _rdrandxx_step( p ) != 0 ) + { + return SYMCRYPT_NO_ERROR; + } + } + return SYMCRYPT_HARDWARE_FAILURE; +} + +#endif + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRdrandStatus(void) +{ + // + // Check that the library is initialized; otherwise the CPUID info + // is all zeroes. (This check only happens in CHKed builds.) + // + SymCryptCheckLibraryInitialized(); + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_RDRAND ) ) + { + return SYMCRYPT_NO_ERROR; + } + else + { + return SYMCRYPT_NOT_IMPLEMENTED; + } +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRdrandGetBytes( + _Out_writes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + _Out_writes_( SYMCRYPT_SHA512_RESULT_SIZE ) PBYTE pbResult ) +{ + SIZE_T * pBuf; + SIZE_T nBuf; + SIZE_T i; + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + // + // Take care of the obvious errors that can happen + // + if( SymCryptRdrandStatus() != SYMCRYPT_NO_ERROR || + (cbBuffer & 0xf) != 0 + ) + { + SymCryptFatal( 'rdrn' ); + } + + pBuf = (SIZE_T *) pbBuffer; + nBuf = cbBuffer / sizeof( SIZE_T ); + + for( i=0; i<nBuf; i++ ) + { + scError = SymCryptRdrandSizet( &pBuf[i] ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + } + + SymCryptSha512( pbBuffer, cbBuffer, pbResult ); + +cleanup: + SymCryptWipe( pbBuffer, cbBuffer ); + + return scError; +} + + +VOID +SYMCRYPT_CALL +SymCryptRdrandGet( + _Out_writes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + _Out_writes_( SYMCRYPT_SHA512_RESULT_SIZE ) PBYTE pbResult ) +{ + if( SymCryptRdrandGetBytes( pbBuffer, cbBuffer, pbResult ) != SYMCRYPT_NO_ERROR ) + { + SymCryptFatal( 'rdrx' ); + } +} + +#ifdef __clang__ +#pragma clang attribute pop +#else +#pragma GCC pop_options +#endif + +#endif // SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 diff --git a/libs/symcrypt/lib/rdseed.c b/libs/symcrypt/lib/rdseed.c new file mode 100644 index 00000000000..5e1f94f7809 --- /dev/null +++ b/libs/symcrypt/lib/rdseed.c @@ -0,0 +1,135 @@ +// +// rdseed.c Support for RdSeed instruction +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +#if (SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64) // only available on x86 and amd64 architectures. + +#ifdef __clang__ +#pragma clang attribute push (__attribute__((target("rdseed"))), apply_to=function) +#else +#pragma GCC push_options +#pragma GCC target("rdseed") +#endif + +#if SYMCRYPT_MS_VC && _MSC_VER < 1610 +#error MSVC version lacks support for RDSEED intrinsics. Compile for the generic environment instead. +#endif + +// +// Create a definition that works on SIZE_Ts. +// + +#if SYMCRYPT_CPU_X86 +#define _rdseedxx_step(_p) _rdseed32_step( (unsigned int *) (_p) ) +#else +#define _rdseedxx_step(_p) _rdseed64_step( (UINT64 *) (_p) ) +#endif + +FORCEINLINE +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRdseedSizet( SIZE_T * p ) +{ + int i; + + // + // There is no way to report errors, and customers rely on the RNG to work properly. + // Therefore, higher layers will fatal if this function fails. + // This is why we have a very high retry count; the alternative is to fatal. + // + // + for( i=0; i<10000000; i++ ) + { + if( _rdseedxx_step( p ) != 0 ) + { + return SYMCRYPT_NO_ERROR; + } + } + return SYMCRYPT_HARDWARE_FAILURE; +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRdseedStatus(void) +{ + // + // Check that the library is initialized; otherwise the CPUID info + // is all zeroes. (This check only happens in CHKed builds.) + // + SymCryptCheckLibraryInitialized(); + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_RDSEED ) ) + { + return SYMCRYPT_NO_ERROR; + } + else + { + return SYMCRYPT_NOT_IMPLEMENTED; + } +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRdseedGetBytes( + _Out_writes_( cbResult ) PBYTE pbResult, + SIZE_T cbResult ) +{ + SIZE_T * pBuf; + SIZE_T nBuf; + SIZE_T i; + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + // + // Take care of the obvious errors that can happen + // + if( SymCryptRdseedStatus() != SYMCRYPT_NO_ERROR || + (cbResult & 0xf) != 0 + ) + { + SymCryptFatal( 'rdsd' ); + } + + pBuf = (SIZE_T *) pbResult; + nBuf = cbResult / sizeof( SIZE_T ); + + for( i=0; i<nBuf; i++ ) + { + scError = SymCryptRdseedSizet( &pBuf[i] ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + } + +cleanup: + + return scError; +} + + +VOID +SYMCRYPT_CALL +SymCryptRdseedGet( + _Out_writes_( cbResult ) PBYTE pbResult, + SIZE_T cbResult ) +{ + if( SymCryptRdseedGetBytes( pbResult, cbResult ) != SYMCRYPT_NO_ERROR ) + { + SymCryptFatal( 'rdsx' ); + } +} + + +#ifdef __clang__ +#pragma clang attribute pop +#else +#pragma GCC pop_options +#endif + +#endif // SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 diff --git a/libs/symcrypt/lib/recoding.c b/libs/symcrypt/lib/recoding.c new file mode 100644 index 00000000000..4341c5c8fff --- /dev/null +++ b/libs/symcrypt/lib/recoding.c @@ -0,0 +1,209 @@ +// +// recoding.c Algorithms for recoding the factors / exponents in various implementations +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// + +#include "precomp.h" + +// +// The following is an adaptation of algorithm 6: "Protected +// odd-only recoding algorithm for the fixed-window representation" +// from the paper +// "Selecting Elliptic Curves for Cryptography: An Efficiency and +// Security Analysis" by Bos, Costello, Longa, and Naehrig +// +// Input: odd integer k \in [1,GOrd], window width w>=2, and +// t = ceil( GOrdBitsize / w-1 ) +// +// Output: (k_t, ... , k_0) where k_i \in {+-1, +-3, ..., +-(2^(w-1) -1)} +// +// Algorithm: +// for i=0 to (t-1) do +// k_i = (k mod 2^w) - 2^(w-1) +// k = (k-k_i)/2^(w-1) +// k_t = k mod 2^(w-1) +// return (k_t, ..., k_0) +// +// Remarks: +// 1. An invariant of the main loop is that (k > 0 and k odd). This means +// that all k_i's are odd and that k_t > 0. +// 2. We will store the values of k_i's as absolute values and signs in +// absofKIs and sigofKIs arrays, resp. The sigofKIs[i] is 0xffffffff if +// k_i < 0, otherwise it is 0. +// 3. In the multiplication algorithm we always access the precomputed point +// P[(|k_i|-1)/2]. Therefore here we just shift the |k_i| value left by +// one bit before storing it in absofKIs table. +// 4. Caller should check k in range [1,GOrd] to ensure use of recoding will +// give correct results. This algorithm always recodes the t * (w-1) least +// significant bits of the provided k, interpreted as an unsigned integer. +// +VOID +SYMCRYPT_CALL +SymCryptFixedWindowRecoding( + UINT32 W, + _Inout_ PSYMCRYPT_INT piK, + _Inout_ PSYMCRYPT_INT piTmp, + _Out_writes_( nRecodedDigits ) + PUINT32 absofKIs, + _Out_writes_( nRecodedDigits ) + PUINT32 sigofKIs, + UINT32 nRecodedDigits ) +{ + UINT32 T1 = 0; + UINT32 T2 = 0; + UINT32 mask = ~(0xffffffff << W); // Window mask = 2^w - 1 (e.g. 0x0000003f for w = 6) + UINT32 smask = 0x1 << (W-1); // Sign mask = 2^(w-1) (e.g. 0x00000020 for w = 6) + + SYMCRYPT_ASSERT( W < 32 ); + + for (UINT32 i=0; i < nRecodedDigits - 1; i++) + { + T1 = SymCryptIntGetValueLsbits32( piK ) & mask; // T1 = k mod 2^W + + // At this point if the w-th bit of T1 is 1 then we know that T1 > 2^(w-1) + // (Since k = odd is a loop invariant). + // + // In this case, (case A), T1 & ~smask is equal to (k mod 2^w) - 2^(w-1) = k_i = |k_i|. + // + // Otherwise, (case B), we know that T1 < 2^(w-1). Therefore 2^(w-1) - T1 = |k_i|. + + sigofKIs[i] = SYMCRYPT_MASK32_ZERO( T1 & smask ); // If the sign of k_i is - this mask is set to 0xffffffff. (Case B) + + T2 = T1 & ~smask; // |k_i| in case A + T1 = smask - T1; // |k_i| in case B + + absofKIs[i] = ((T1 & sigofKIs[i]) | (T2 & ~sigofKIs[i])) >> 1; // Setting (masked) the absolute value of k_i in absofKIs (divided by 2) + + SymCryptIntSubUint32( piK, T2, piTmp ); // This gives k - k_i in case (A) + SymCryptIntAddUint32( piK, T1, piK ); // This gives k - k_i in case (B) + + SymCryptIntMaskedCopy( piTmp, piK, ~sigofKIs[i] ); // Copy the result to piK in case (B) + + SymCryptIntDivPow2( piK, W-1, piK ); // k := k / 2^(w-1) + } + + // The last sign is positive given k < GOrd => k_t < 2^w + sigofKIs[nRecodedDigits - 1] = 0; + // Belts and braces, select only the bottom w-1 bits (ensure all absofKIs represent odd values in range [1,2^(w-1)-1]) + absofKIs[nRecodedDigits - 1] = (SymCryptIntGetValueLsbits32( piK ) & mask & ~smask) >> 1; +} + +// +// The following is an algorithm for computing the width-w NAF of a positive integer. +// +// Input: integer k \in [1,GOrd), window width w>=2, and nRecodedDigits = GOrdBitsize + 1 +// +// Output: (k_(nRecodedDigits-1), ... , k_0) where k_i \in {0, +-1, +-3, ..., +-(2^(w-1) -1)} +// +// Algorithm: +// for i = 0 to (nRecodedDigits-1) +// if (k is odd) +// k_i = (k mods 2^w) +// k = k - k_i +// else +// k_i = 0 +// k = k/2 +// return (k_(nRecodedDigits-1), ..., k_0) +// +// Note: k mods 2^w is the integer u with (u == k mod 2^w) and (-2^(w-1) <= u < 2^(w-1) ). +// +// Remarks: +// 1. The above algorithm and the implementation are NOT SIDE-CHANNEL SAFE. +// Therefore, it should only be used when the SYMCRYPT_FLAG_DATA_PUBLIC is +// specified. +// 2. The multiplication algorithm uses |k_i|/2 as indexes. Therefore we will shift left +// the absolute value of k_i by 1 bit and store only |k_i|/2. +// 3. Since now the k_i's can be zero we will store the following in sigofKIs: +// sigofKIs[i] = 0x00000001 if k_i > 0 +// sigofKIs[i] = 0x00000000 if k_i = 0 +// sigofKIs[i] = 0xffffffff if k_i < 0 +// +VOID +SYMCRYPT_CALL +SymCryptWidthNafRecoding( + UINT32 W, + _Inout_ PSYMCRYPT_INT piK, + _Out_writes_( nRecodedDigits ) + PUINT32 absofKIs, + _Out_writes_( nRecodedDigits ) + PUINT32 sigofKIs, + UINT32 nRecodedDigits ) +{ + UINT32 T1 = 0; + UINT32 mask = ~(0xffffffff << W); // Window mask = 2^w - 1 (e.g. 0x0000003f for w = 6) + UINT32 modulus = mask + 1; // 2^w + UINT32 smask = 0x1 << (W-1); // Sign mask = 2^(w-1) (e.g. 0x00000020 for w = 6) + + SYMCRYPT_ASSERT( W < 32 ); + + for (UINT32 i=0; i < nRecodedDigits; i++) + { + T1 = SymCryptIntGetValueLsbits32( piK ) & mask; // T1 = k mod 2^W + + if (T1 & 0x1) + { + if (T1 > smask) + { + sigofKIs[i] = 0xffffffff; + absofKIs[i] = modulus - T1; // 2^W - T1 = |T1 - 2^W| + SymCryptIntAddUint32( piK, absofKIs[i], piK ); // k-k_i + } + else + { + // Here (k mod 2^W) is already in the specified range + sigofKIs[i] = 0x00000001; + absofKIs[i] = T1; + SymCryptIntSubUint32( piK, absofKIs[i], piK ); // k-k_i + } + } + else + { + absofKIs[i] = 0; + sigofKIs[i] = 0; + } + + SymCryptIntDivPow2( piK, 1, piK ); // k := k / 2 + } +} + +// +// The following is an algorithm similar to the above +// but the output is only non-negative (odd) digits. +// +// Requirements: +// nRecodedDigits == nBitsExp +// +VOID +SYMCRYPT_CALL +SymCryptPositiveWidthNafRecoding( + UINT32 W, + _In_ PCSYMCRYPT_INT piK, + UINT32 nBitsExp, + _Out_writes_( nRecodedDigits ) + PUINT32 absofKIs, + UINT32 nRecodedDigits ) +{ + UINT32 T1 = 0; + UINT32 cntrZ = W; // Counter that specifies when we filled the last non-zero NAF digit + + SYMCRYPT_ASSERT( nRecodedDigits <= SymCryptIntBitsizeOfObject( piK ) ); + + for (UINT32 i=0; i < nRecodedDigits; i++) + { + T1 = SymCryptIntGetBits( piK, i, SYMCRYPT_MIN(W, nBitsExp-i) ); // Get a batch of W bits (but don't go over nBitsExp) + + if ((cntrZ>=W) && ((T1 & 0x01) > 0)) // Only store odd digits + { + absofKIs[i] = T1; + cntrZ = 0; + } + else + { + absofKIs[i] = 0; + } + + cntrZ++; // Prepare the counter for the next iteration + } +} diff --git a/libs/symcrypt/lib/rsa_enc.c b/libs/symcrypt/lib/rsa_enc.c new file mode 100644 index 00000000000..a76e663d923 --- /dev/null +++ b/libs/symcrypt/lib/rsa_enc.c @@ -0,0 +1,1531 @@ +// +// rsa_enc.c RSA related algorithms +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +// +// Helper functions for RSA raw encrypt/decrypt (they do NOT allocate scratch space) +// + +UINT32 +SYMCRYPT_CALL +SymCryptRsaCoreEncScratchSpace( _In_ PCSYMCRYPT_RSAKEY pkRsakey) +{ + // Bounded by 2^19 + 2^24 ~ 2^24 (see symcrypt_internal.h) + return SymCryptSizeofModElementFromModulus( pkRsakey->pmModulus ) + + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pkRsakey->nDigitsOfModulus ), + SYMCRYPT_SCRATCH_BYTES_FOR_MODEXP( pkRsakey->nDigitsOfModulus ) ); +} + +SYMCRYPT_ERROR +SymCryptRsaCoreVerifyInput( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_NUMBER_FORMAT numFormat, + SIZE_T cbDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PSYMCRYPT_INT piTmpInteger = NULL; + UINT32 cbTmpInteger = 0; + + UNREFERENCED_PARAMETER( cbScratch ); + + if ( cbSrc > SymCryptRsakeySizeofModulus(pkRsakey) || + cbDst < SymCryptRsakeySizeofModulus(pkRsakey) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // It is an error of value(pbSrc) >= modulus + // We already know that cbSrc <= sizeof( modulus ) so we only have to run this check + // if cbSrc == sizeof( modulus ) + // No side channel issues here: we are only comparing the input to the public part of the key. + if (cbSrc == SymCryptRsakeySizeofModulus(pkRsakey)) + { + cbTmpInteger = SymCryptSizeofIntFromDigits( pkRsakey->nDigitsOfModulus ); + SYMCRYPT_ASSERT( cbScratch >= cbTmpInteger ); + piTmpInteger = SymCryptIntCreate( pbScratch, cbTmpInteger, pkRsakey->nDigitsOfModulus ); + + scError = SymCryptIntSetValue( pbSrc, cbSrc, numFormat, piTmpInteger ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + if (!SymCryptIntIsLessThan(piTmpInteger, SymCryptIntFromModulus(pkRsakey->pmModulus))) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + } + +cleanup: + return scError; +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaCoreEnc( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_NUMBER_FORMAT numFormat, + UINT32 flags, + _Out_writes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PSYMCRYPT_MODELEMENT peRes = NULL; + UINT32 cbModElement = 0; + + PBYTE pbFnScratch = NULL; + SIZE_T cbFnScratch = 0; + + BYTE abExpIntBuffer[ SYMCRYPT_SIZEOF_INT_FROM_BITS( 64 ) + SYMCRYPT_ASYM_ALIGN_VALUE]; + PSYMCRYPT_INT piExp = NULL; + + UNREFERENCED_PARAMETER( flags ); + + scError = SymCryptRsaCoreVerifyInput(pkRsakey, pbSrc, cbSrc, numFormat, cbDst, pbScratch, cbScratch); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + cbModElement = SymCryptSizeofModElementFromModulus( pkRsakey->pmModulus ); + + UNREFERENCED_PARAMETER( cbScratch ); + SYMCRYPT_ASSERT( cbScratch >= cbModElement + + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pkRsakey->nDigitsOfModulus ), + SYMCRYPT_SCRATCH_BYTES_FOR_MODEXP( pkRsakey->nDigitsOfModulus ) )); + + pbFnScratch = pbScratch; + cbFnScratch = cbScratch; + + peRes = SymCryptModElementCreate( pbScratch, cbModElement, pkRsakey->pmModulus ); + SYMCRYPT_ASSERT( peRes != NULL ); + pbFnScratch += cbModElement; + cbFnScratch -= cbModElement; + + // Set the original value + scError = SymCryptModElementSetValue( pbSrc, cbSrc, numFormat, pkRsakey->pmModulus, peRes, pbFnScratch, cbFnScratch ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // Convert the public exponent to an Int + // Future: we can optimize the ModExp to take an UINT64 + piExp = SymCryptIntCreate( SYMCRYPT_ASYM_ALIGN_UP(abExpIntBuffer), sizeof( abExpIntBuffer) - SYMCRYPT_ASYM_ALIGN_VALUE, 1 ); + if( piExp == NULL ) + { + scError = SYMCRYPT_HARDWARE_FAILURE; + goto cleanup; + } + SymCryptIntSetValueUint64( pkRsakey->au64PubExp[0], piExp ); + + // Modular Exponentiation + SymCryptModExp( + pkRsakey->pmModulus, + peRes, + piExp, + SymCryptIntBitsizeOfValue( piExp ), // This is a public value + SYMCRYPT_FLAG_DATA_PUBLIC, + peRes, + pbFnScratch, + cbFnScratch ); + + // Output the value + scError = SymCryptModElementGetValue( pkRsakey->pmModulus, peRes, pbDst, cbDst, numFormat, pbFnScratch, cbFnScratch ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + +cleanup: + + if( piExp != NULL ) + { + SymCryptIntWipe( piExp ); + } + + return scError; +} + +UINT32 +SYMCRYPT_CALL +SymCryptRsaCoreDecCrtScratchSpace( _In_ PCSYMCRYPT_RSAKEY pkRsakey) +{ + UINT32 cbModElementTotal = 0; + UINT32 nPrimes = pkRsakey->nPrimes; + + SYMCRYPT_ASSERT( nPrimes <= SYMCRYPT_RSAKEY_MAX_NUMOF_PRIMES ); + // clamp nPrimes to SYMCRYPT_RSAKEY_MAX_NUMOF_PRIMES for scratch memory allocation purposes + // SymCryptRsaCoreDecCrt will fail with invalid argument if there are too many primes later + nPrimes = SYMCRYPT_MIN( nPrimes, SYMCRYPT_RSAKEY_MAX_NUMOF_PRIMES ); + + for (UINT32 i=0; i<pkRsakey->nPrimes; i++) + { + cbModElementTotal += SYMCRYPT_SIZEOF_MODELEMENT_FROM_BITS( pkRsakey->nBitsOfPrimes[i]); + } + + // Bounded by 5*2^19 + 2^24 ~ 2^24 (see symcrypt_internal.h) + return 3*SymCryptSizeofIntFromDigits( pkRsakey->nDigitsOfModulus ) + + SymCryptSizeofIntFromDigits( pkRsakey->nMaxDigitsOfPrimes ) + + cbModElementTotal + + SYMCRYPT_SIZEOF_MODELEMENT_FROM_BITS( pkRsakey->nBitsOfModulus) + + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pkRsakey->nDigitsOfModulus ), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_MODEXP( pkRsakey->nDigitsOfModulus ), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_INT_DIVMOD( pkRsakey->nDigitsOfModulus, pkRsakey->nMaxDigitsOfPrimes ), + SYMCRYPT_SCRATCH_BYTES_FOR_CRT_SOLUTION( pkRsakey->nMaxDigitsOfPrimes ) ))); +} + +UINT32 +SYMCRYPT_CALL +SymCryptRsaCoreDecScratchSpace( _In_ PCSYMCRYPT_RSAKEY pkRsakey) +{ + // Bounded by 2^19 + 2^24 ~ 2^24 (see symcrypt_internal.h) + return SymCryptSizeofModElementFromModulus( pkRsakey->pmModulus ) + + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pkRsakey->nDigitsOfModulus ), + SYMCRYPT_SCRATCH_BYTES_FOR_MODEXP( pkRsakey->nDigitsOfModulus ) ); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaCoreDecCrt( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_NUMBER_FORMAT numFormat, + UINT32 flags, + _Out_writes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PSYMCRYPT_INT piCiphertext = NULL; + PSYMCRYPT_INT piPlaintext = NULL; + UINT32 cbInt = 0; + + PSYMCRYPT_INT piTmp = NULL; + UINT32 cbTmp = 0; + + PSYMCRYPT_MODELEMENT peCrtElements[SYMCRYPT_RSAKEY_MAX_NUMOF_PRIMES] = { 0 }; + UINT32 cbModElements[SYMCRYPT_RSAKEY_MAX_NUMOF_PRIMES] = { 0 }; + UINT32 cbModElementTotal = 0; + + // Used to verify decryption + PSYMCRYPT_INT piVerify = NULL; // Size equal to cbInt + PSYMCRYPT_MODELEMENT peVerify = NULL; + UINT32 cbModElementVerify = 0; + + PBYTE pbFnScratch = NULL; + SIZE_T cbFnScratch = 0; + + UNREFERENCED_PARAMETER( flags ); + + // Make sure that the key has a private key + if (!pkRsakey->hasPrivateKey) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + scError = SymCryptRsaCoreVerifyInput(pkRsakey, pbSrc, cbSrc, numFormat, cbDst, pbScratch, cbScratch); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // Verify that the number of primes does not cause a stack overflow + if (pkRsakey->nPrimes > SYMCRYPT_RSAKEY_MAX_NUMOF_PRIMES) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + cbInt = SymCryptSizeofIntFromDigits( pkRsakey->nDigitsOfModulus ); + cbTmp = SymCryptSizeofIntFromDigits( pkRsakey->nMaxDigitsOfPrimes ); + for (UINT32 i=0; i<pkRsakey->nPrimes; i++) + { + cbModElements[i] = SYMCRYPT_SIZEOF_MODELEMENT_FROM_BITS( pkRsakey->nBitsOfPrimes[i]); + cbModElementTotal += cbModElements[i]; + } + + cbModElementVerify = SymCryptSizeofModElementFromModulus( pkRsakey->pmModulus ); + + UNREFERENCED_PARAMETER( cbScratch ); + // + // From symcrypt_internal.h we have: + // - sizeof results are upper bounded by 2^19 + // - SYMCRYPT_SCRATCH_BYTES results are upper bounded by 2^27 (including RSA and ECURVE) + // - nPrimes is at most SYMCRYPT_RSAKEY_MAX_NUMOF_PRIMES = 2 + // Thus the following calculation does not overflow cbScratch. + // + SYMCRYPT_ASSERT( cbScratch >= + 3*cbInt + cbTmp + cbModElementTotal + cbModElementVerify + + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pkRsakey->nDigitsOfModulus ), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_MODEXP( pkRsakey->nDigitsOfModulus ), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_INT_DIVMOD( pkRsakey->nDigitsOfModulus, pkRsakey->nMaxDigitsOfPrimes ), + SYMCRYPT_SCRATCH_BYTES_FOR_CRT_SOLUTION( pkRsakey->nMaxDigitsOfPrimes ) ))) ); + + pbFnScratch = pbScratch; + cbFnScratch = cbScratch; + + piPlaintext = SymCryptIntCreate( pbFnScratch, cbFnScratch, pkRsakey->nDigitsOfModulus ); + SYMCRYPT_ASSERT( piPlaintext != NULL ); + pbFnScratch += cbInt; + cbFnScratch -= cbInt; + + piCiphertext = SymCryptIntCreate( pbFnScratch, cbFnScratch, pkRsakey->nDigitsOfModulus ); + SYMCRYPT_ASSERT( piCiphertext != NULL ); + pbFnScratch += cbInt; + cbFnScratch -= cbInt; + + piTmp = SymCryptIntCreate( pbFnScratch, cbFnScratch, pkRsakey->nMaxDigitsOfPrimes ); + SYMCRYPT_ASSERT( piTmp != NULL ); + pbFnScratch += cbTmp; + cbFnScratch -= cbTmp; + + SYMCRYPT_ASSERT( pkRsakey->nPrimes <= SYMCRYPT_RSAKEY_MAX_NUMOF_PRIMES ); + for (UINT32 i=0; i<pkRsakey->nPrimes; i++) + { + peCrtElements[i] = SymCryptModElementCreate( pbFnScratch, cbFnScratch, pkRsakey->pmPrimes[i] ); + SYMCRYPT_ASSERT( peCrtElements[i] != NULL ); + pbFnScratch += cbModElements[i]; + cbFnScratch -= cbModElements[i]; + } + + piVerify = SymCryptIntCreate( pbFnScratch, cbFnScratch, pkRsakey->nDigitsOfModulus ); + SYMCRYPT_ASSERT( piVerify != NULL ); + pbFnScratch += cbInt; + cbFnScratch -= cbInt; + + peVerify = SymCryptModElementCreate( pbFnScratch, cbFnScratch, pkRsakey->pmModulus ); + SYMCRYPT_ASSERT( peVerify != NULL ); + pbFnScratch += cbModElementVerify; + cbFnScratch -= cbModElementVerify; + + // Set the ciphertext + scError = SymCryptIntSetValue( pbSrc, cbSrc, numFormat, piCiphertext ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // Modular exponentiations + for (UINT32 i=0; i<pkRsakey->nPrimes; i++) + { + // c mod the prime + // Note: For two equally sized primes we can use straight the faster SymCryptIntToModElement function + // but for now this is the general case. + SymCryptIntDivMod( + piCiphertext, + SymCryptDivisorFromModulus(pkRsakey->pmPrimes[i]), + NULL, + piTmp, + pbFnScratch, + cbFnScratch ); + + SymCryptIntToModElement( piTmp, pkRsakey->pmPrimes[i], peCrtElements[i], pbFnScratch, cbFnScratch ); + + // Modular Exponentiation + SymCryptModExp( + pkRsakey->pmPrimes[i], + peCrtElements[i], + pkRsakey->piCrtPrivExps[i], // For now only the first exponent is allowed + pkRsakey->nBitsOfPrimes[i], // This is a public value + 0, // Side-channel safe modexp + peCrtElements[i], + pbFnScratch, + cbFnScratch ); + } + + // Solve the crt equations + scError = SymCryptCrtSolve( + pkRsakey->nPrimes, + (PCSYMCRYPT_MODULUS *) pkRsakey->pmPrimes, + (PSYMCRYPT_MODELEMENT *) pkRsakey->peCrtInverses, + peCrtElements, + 0, + piPlaintext, + pbFnScratch, + cbFnScratch ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + /* + A hardware error during RSA decryption can leak the + prime factors. For example, suppose the message + is M and you try to sign it with + M^d for some decryption exponent d. + Using the CRT, you compute M^d mod p correctly but + M^d mod q incorrectly. Your supposed M^d (mod p*q) is + then raised to an encryption exponent e + by the verifier, detects an invalid signature. + The verifier can also find p via a GCD and factor the modulus. + + To avoid this problem, re-encrypt the supposed M^d + and verify our own signature. + */ + + // Don't call the full encryption function just the modular exponentiation + + SymCryptIntToModElement( piPlaintext, pkRsakey->pmModulus, peVerify, pbFnScratch, cbFnScratch ); + + SymCryptIntSetValueUint64( pkRsakey->au64PubExp[0], piTmp ); + + // Modular Exponentiation (Not side-channel safe) + SymCryptModExp( + pkRsakey->pmModulus, + peVerify, + piTmp, + SymCryptIntBitsizeOfValue( piTmp ), + SYMCRYPT_FLAG_DATA_PUBLIC, // Exponent is public + peVerify, + pbFnScratch, + cbFnScratch ); + + SymCryptModElementToInt( pkRsakey->pmModulus, peVerify, piVerify, pbFnScratch, cbFnScratch ); + + if (!SymCryptIntIsEqual( piCiphertext, piVerify )) + { + scError = SYMCRYPT_HARDWARE_FAILURE; + goto cleanup; + } + + // Output the result + scError = SymCryptIntGetValue( piPlaintext, pbDst, cbDst, numFormat ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + +cleanup: + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaCoreDec( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_NUMBER_FORMAT numFormat, + UINT32 flags, + _Out_writes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PSYMCRYPT_MODELEMENT peRes = NULL; + UINT32 cbModElement = 0; + + PBYTE pbFnScratch = NULL; + SIZE_T cbFnScratch = 0; + + UNREFERENCED_PARAMETER( flags ); + + // Make sure that the key has a private key + if ((cbSrc>SymCryptRsakeySizeofModulus(pkRsakey)) || + (!pkRsakey->hasPrivateKey) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + cbModElement = SymCryptSizeofModElementFromModulus( pkRsakey->pmModulus ); + + UNREFERENCED_PARAMETER( cbScratch ); + SYMCRYPT_ASSERT( cbScratch >= cbModElement + + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pkRsakey->nDigitsOfModulus ), + SYMCRYPT_SCRATCH_BYTES_FOR_MODEXP( pkRsakey->nDigitsOfModulus ) ) ); + + pbFnScratch = pbScratch; + cbFnScratch = cbScratch; + + peRes = SymCryptModElementCreate( pbScratch, cbModElement, pkRsakey->pmModulus ); + SYMCRYPT_ASSERT( peRes != NULL ); + pbFnScratch += cbModElement; + cbFnScratch -= cbModElement; + + // Set the ciphertext + scError = SymCryptModElementSetValue( pbSrc, cbSrc, numFormat, pkRsakey->pmModulus, peRes, pbFnScratch, cbFnScratch ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // Modular Exponentiation + SymCryptModExp( + pkRsakey->pmModulus, + peRes, + pkRsakey->piPrivExps[0], // For now only the first exponent is allowed + pkRsakey->nBitsOfModulus, // This is a public value + 0, // Side-channel safe modexp + peRes, + pbFnScratch, + cbFnScratch ); + + // Output the value + scError = SymCryptModElementGetValue( pkRsakey->pmModulus, peRes, pbDst, cbDst, numFormat, pbFnScratch, cbFnScratch ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + +cleanup: + + return scError; +} + + +// +// Encryption / decryption functions +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaRawEncrypt( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_NUMBER_FORMAT numFormat, + UINT32 flags, + _Out_writes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PBYTE pbScratch = NULL; + UINT32 cbScratch = 0; + + // Make sure that the key may be used in Encrypt/Decrypt + if ( (pkRsakey->fAlgorithmInfo & SYMCRYPT_FLAG_RSAKEY_ENCRYPT) == 0 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + cbScratch = SymCryptRsaCoreEncScratchSpace( pkRsakey ); + + pbScratch = (PBYTE)SymCryptCallbackAlloc( cbScratch ); + if (pbScratch == NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + scError = SymCryptRsaCoreEnc( pkRsakey, pbSrc, cbSrc, numFormat, flags, pbDst, cbDst, pbScratch, cbScratch ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + scError = SYMCRYPT_NO_ERROR; + +cleanup: + if (pbScratch!=NULL) + { + SymCryptWipe(pbScratch,cbScratch); + SymCryptCallbackFree(pbScratch); + } + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaRawDecrypt( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_NUMBER_FORMAT numFormat, + UINT32 flags, + _Out_writes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PBYTE pbScratch = NULL; + UINT32 cbScratch = 0; + + // Make sure that the key may be used in Encrypt/Decrypt + if ( (pkRsakey->fAlgorithmInfo & SYMCRYPT_FLAG_RSAKEY_ENCRYPT) == 0 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Make sure that the key has a private key + if (!pkRsakey->hasPrivateKey) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + +#define SYMCRYPT_CRT_DECRYPTION (1) // Set this to 0 to test the non-crt decryption + + // Scratch space +#if (SYMCRYPT_CRT_DECRYPTION) + cbScratch = SymCryptRsaCoreDecCrtScratchSpace( pkRsakey ); +#else + cbScratch = SymCryptRsaCoreDecScratchSpace( pkRsakey ); +#endif + + pbScratch = (PBYTE)SymCryptCallbackAlloc( cbScratch ); + if (pbScratch == NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + +#if (SYMCRYPT_CRT_DECRYPTION) + scError = SymCryptRsaCoreDecCrt( pkRsakey, pbSrc, cbSrc, numFormat, flags, pbDst, cbDst, pbScratch, cbScratch ); +#else + scError = SymCryptRsaCoreDec( pkRsakey, pbSrc, cbSrc, numFormat, flags, pbDst, cbDst, pbScratch, cbScratch ); +#endif + + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + scError = SYMCRYPT_NO_ERROR; + +cleanup: + if (pbScratch!=NULL) + { + SymCryptWipe(pbScratch,cbScratch); + SymCryptCallbackFree(pbScratch); + } + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaPkcs1Encrypt( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + UINT32 flags, + SYMCRYPT_NUMBER_FORMAT nfDst, + _Out_writes_opt_( cbDst ) PBYTE pbDst, + SIZE_T cbDst, + _Out_ SIZE_T *pcbDst ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PBYTE pbScratch = NULL; + SIZE_T cbScratch = 0; + + PBYTE pbTmp = NULL; + SIZE_T cbTmp = SymCryptRsakeySizeofModulus(pkRsakey); + + cbScratch = cbTmp + SymCryptRsaCoreEncScratchSpace( pkRsakey ); + + UNREFERENCED_PARAMETER( flags ); + + // Make sure that the key may be used in Encrypt/Decrypt + if ( (pkRsakey->fAlgorithmInfo & SYMCRYPT_FLAG_RSAKEY_ENCRYPT) == 0 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + *pcbDst = cbTmp; + + // Check if only *pcbDst is needed + if (pbDst == NULL) + { + scError = SYMCRYPT_NO_ERROR; + goto cleanup; + } + + pbScratch = (PBYTE)SymCryptCallbackAlloc( cbScratch ); + if (pbScratch == NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + pbTmp = pbScratch + cbScratch - cbTmp; + + scError = SymCryptRsaPkcs1ApplyEncryptionPadding( + pbSrc, + cbSrc, + pbTmp, + cbTmp ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + scError = SymCryptRsaCoreEnc( + pkRsakey, + pbTmp, + cbTmp, + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, // Always MSB first for RSA OAEP + flags, + pbDst, + cbDst, + pbScratch, + cbScratch - cbTmp ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + if (nfDst == SYMCRYPT_NUMBER_FORMAT_LSB_FIRST) + { + // To implement this revert the buffer properly + scError = SYMCRYPT_NOT_IMPLEMENTED; + goto cleanup; + } + + scError = SYMCRYPT_NO_ERROR; + +cleanup: + if (pbScratch!=NULL) + { + SymCryptWipe(pbScratch,cbScratch); + SymCryptCallbackFree(pbScratch); + } + + return scError; +} + +// Ensure SymCryptRoundUpPow2Sizet below will not fail +C_ASSERT((UINT32) ((SYMCRYPT_RSAKEY_MAX_BITSIZE_MODULUS + 7) / 8) <= ((SIZE_T_MAX / 2) + 1)); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaPkcs1Decrypt( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_NUMBER_FORMAT nfSrc, + UINT32 flags, + _Out_writes_opt_( cbDst ) PBYTE pbDst, + SIZE_T cbDst, + _Out_ SIZE_T *pcbDst ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PBYTE pbScratch = NULL; + SIZE_T cbScratch = 0; + + PBYTE pbTmp = NULL; + SIZE_T cbModulus = SymCryptRsakeySizeofModulus(pkRsakey); + SIZE_T cbTmp = SymCryptRoundUpPow2Sizet( cbModulus ); // tmp buffer needs to be a power of 2 + + // Make sure that the key may be used in Encrypt/Decrypt + if ( (pkRsakey->fAlgorithmInfo & SYMCRYPT_FLAG_RSAKEY_ENCRYPT) == 0 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Make sure that the key has a private key + if (!pkRsakey->hasPrivateKey) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + +#if (SYMCRYPT_CRT_DECRYPTION) + cbScratch = cbTmp + SymCryptRsaCoreDecCrtScratchSpace( pkRsakey ); +#else + cbScratch = cbTmp + SymCryptRsaCoreDecScratchSpace( pkRsakey ); +#endif + + UNREFERENCED_PARAMETER( flags ); + + pbScratch = (PBYTE)SymCryptCallbackAlloc( cbScratch ); + if (pbScratch == NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + pbTmp = pbScratch + cbScratch - cbTmp; + + if (nfSrc == SYMCRYPT_NUMBER_FORMAT_LSB_FIRST) + { + // To implement this revert the buffer properly + scError = SYMCRYPT_NOT_IMPLEMENTED; + goto cleanup; + } + +#if (SYMCRYPT_CRT_DECRYPTION) + scError = SymCryptRsaCoreDecCrt( + pkRsakey, + pbSrc, + cbSrc, + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, + flags, + pbTmp, + cbModulus, + pbScratch, + cbScratch - cbTmp ); +#else + scError = SymCryptRsaCoreDec( + pkRsakey, + pbSrc, + cbSrc, + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, + flags, + pbTmp, + cbModulus, + pbScratch, + cbScratch - cbTmp ); +#endif + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + scError = SymCryptRsaPkcs1RemoveEncryptionPadding( + pbTmp, + cbModulus, + cbTmp, + pbDst, + cbDst, + pcbDst ); + // The error that is returned from the encryption padding is confidential data + // due to Bleichenbacher-style attacks. + // Make sure we don't create a side-channel leak for it. + +cleanup: + if (pbScratch!=NULL) + { + SymCryptWipe(pbScratch,cbScratch); + SymCryptCallbackFree(pbScratch); + } + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaOaepEncrypt( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + _In_ PCSYMCRYPT_HASH hashAlgorithm, + _In_reads_bytes_( cbLabel ) PCBYTE pbLabel, + SIZE_T cbLabel, + UINT32 flags, + SYMCRYPT_NUMBER_FORMAT nfDst, + _Out_writes_opt_( cbDst ) PBYTE pbDst, + SIZE_T cbDst, + _Out_ SIZE_T *pcbDst ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PBYTE pbScratch = NULL; + SIZE_T cbScratch = 0; + + PBYTE pbTmp = NULL; + SIZE_T cbTmp = SymCryptRsakeySizeofModulus(pkRsakey); + + UNREFERENCED_PARAMETER( flags ); + + // Make sure that the key may be used in Encrypt/Decrypt + if ( (pkRsakey->fAlgorithmInfo & SYMCRYPT_FLAG_RSAKEY_ENCRYPT) == 0 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + *pcbDst = cbTmp; + + // Check if only *pcbDst is needed + if (pbDst == NULL) + { + scError = SYMCRYPT_NO_ERROR; + goto cleanup; + } + + // The SYMCRYPT_SCRATCH_BYTES_FOR_RSA_OAEP macro does not + // overflow cbScratch since cbTmp < 2^17. + cbScratch = cbTmp + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_RSA_OAEP( hashAlgorithm, cbTmp ), SymCryptRsaCoreEncScratchSpace( pkRsakey ) ); + pbScratch = (PBYTE)SymCryptCallbackAlloc( cbScratch ); + if (pbScratch == NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + pbTmp = pbScratch + cbScratch - cbTmp; + + scError = SymCryptRsaOaepApplyEncryptionPadding( + pbSrc, + cbSrc, + hashAlgorithm, + pbLabel, + cbLabel, + NULL, // Seed + 0, // cbSeed + pbTmp, + cbTmp, + pbScratch, + cbScratch - cbTmp ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + scError = SymCryptRsaCoreEnc( + pkRsakey, + pbTmp, + cbTmp, + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, // Always MSB first for RSA OAEP + flags, + pbDst, + cbDst, + pbScratch, + cbScratch - cbTmp ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + if (nfDst == SYMCRYPT_NUMBER_FORMAT_LSB_FIRST) + { + // To implement this revert the buffer properly + scError = SYMCRYPT_NOT_IMPLEMENTED; + goto cleanup; + } + + scError = SYMCRYPT_NO_ERROR; + +cleanup: + if (pbScratch!=NULL) + { + SymCryptWipe(pbScratch,cbScratch); + SymCryptCallbackFree(pbScratch); + } + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaOaepDecrypt( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_NUMBER_FORMAT nfSrc, + _In_ PCSYMCRYPT_HASH hashAlgorithm, + _In_reads_bytes_( cbLabel ) PCBYTE pbLabel, + SIZE_T cbLabel, + UINT32 flags, + _Out_writes_opt_( cbDst ) PBYTE pbDst, + SIZE_T cbDst, + _Out_ SIZE_T *pcbDst ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + SIZE_T cbDstResult = 0; // We always return a value into *pcbDst + + PBYTE pbScratch = NULL; + SIZE_T cbScratch = 0; + + PBYTE pbTmp = NULL; + SIZE_T cbTmp = SymCryptRsakeySizeofModulus(pkRsakey); + + UNREFERENCED_PARAMETER( flags ); + + // Make sure that the key may be used in Encrypt/Decrypt + if ( (pkRsakey->fAlgorithmInfo & SYMCRYPT_FLAG_RSAKEY_ENCRYPT) == 0 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if (cbSrc > cbTmp) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Make sure that the key has a private key + if (!pkRsakey->hasPrivateKey) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // The SYMCRYPT_SCRATCH_BYTES_FOR_RSA_OAEP macro does not + // overflow cbScratch since cbTmp < 2^17. +#if (SYMCRYPT_CRT_DECRYPTION) + cbScratch = cbTmp + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_RSA_OAEP( hashAlgorithm, cbSrc ), SymCryptRsaCoreDecCrtScratchSpace( pkRsakey ) ); +#else + cbScratch = cbTmp + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_RSA_OAEP( hashAlgorithm, cbSrc ), SymCryptRsaCoreDecScratchSpace( pkRsakey ) ); +#endif + + pbScratch = (PBYTE)SymCryptCallbackAlloc( cbScratch ); + if (pbScratch == NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + pbTmp = pbScratch + cbScratch - cbTmp; + + if (nfSrc == SYMCRYPT_NUMBER_FORMAT_LSB_FIRST) + { + // To implement this revert the buffer properly + scError = SYMCRYPT_NOT_IMPLEMENTED; + goto cleanup; + } + +#if (SYMCRYPT_CRT_DECRYPTION) + scError = SymCryptRsaCoreDecCrt( + pkRsakey, + pbSrc, + cbSrc, + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, + flags, + pbTmp, + cbTmp, + pbScratch, + cbScratch - cbTmp ); +#else + scError = SymCryptRsaCoreDec( + pkRsakey, + pbSrc, + cbSrc, + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, + flags, + pbTmp, + cbTmp, + pbScratch, + cbScratch - cbTmp ); +#endif + + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + scError = SymCryptRsaOaepRemoveEncryptionPadding( + pbTmp, + cbTmp, + hashAlgorithm, + pbLabel, + cbLabel, + flags, + pbDst, + cbDst, + &cbDstResult, + pbScratch, + cbScratch - cbTmp ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + scError = SYMCRYPT_NO_ERROR; + +cleanup: + if (pbScratch!=NULL) + { + SymCryptWipe(pbScratch,cbScratch); + SymCryptCallbackFree(pbScratch); + } + + *pcbDst = cbDstResult; + + return scError; +} + +// +// Signing / Verification functions +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaPkcs1Sign( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _In_reads_bytes_( cbHashValue ) PCBYTE pbHashValue, + SIZE_T cbHashValue, + _In_ PCSYMCRYPT_OID pHashOIDs, + _In_ SIZE_T nOIDCount, + UINT32 flags, + SYMCRYPT_NUMBER_FORMAT nfSignature, + _Out_writes_opt_( cbSignature ) PBYTE pbSignature, + SIZE_T cbSignature, + _Out_ SIZE_T *pcbSignature ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PBYTE pbScratch = NULL; + SIZE_T cbScratch = 0; + + PBYTE pbTmp = NULL; + SIZE_T cbTmp = SymCryptRsakeySizeofModulus(pkRsakey); + + PCBYTE pbOID = NULL; + SIZE_T cbOID = 0; + + UNREFERENCED_PARAMETER(nOIDCount); + + pbOID = pHashOIDs ? pHashOIDs->pbOID : NULL; + cbOID = pHashOIDs ? pHashOIDs->cbOID : 0; + + // Make sure that the key may be used in Sign/Verify + if ( (pkRsakey->fAlgorithmInfo & SYMCRYPT_FLAG_RSAKEY_SIGN) == 0 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Make sure that the key has a private key + if (!pkRsakey->hasPrivateKey) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + *pcbSignature = cbTmp; + + // Check if only *pcbSignature is needed + if (pbSignature == NULL) + { + scError = SYMCRYPT_NO_ERROR; + goto cleanup; + } + +#if (SYMCRYPT_CRT_DECRYPTION) + cbScratch = cbTmp + SymCryptRsaCoreDecCrtScratchSpace( pkRsakey ); +#else + cbScratch = cbTmp + SymCryptRsaCoreDecScratchSpace( pkRsakey ); +#endif + + pbScratch = (PBYTE)SymCryptCallbackAlloc( cbScratch ); + if (pbScratch == NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + pbTmp = pbScratch + cbScratch - cbTmp; + + scError = SymCryptRsaPkcs1ApplySignaturePadding( + pbHashValue, + cbHashValue, + pbOID, + cbOID, + flags, + pbTmp, + cbTmp ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + +#if (SYMCRYPT_CRT_DECRYPTION) + scError = SymCryptRsaCoreDecCrt( + pkRsakey, + pbTmp, + cbTmp, + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, + flags, + pbSignature, + cbSignature, + pbScratch, + cbScratch - cbTmp ); +#else + scError = SymCryptRsaCoreDec( + pkRsakey, + pbTmp, + cbTmp, + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, + flags, + pbSignature, + cbSignature, + pbScratch, + cbScratch - cbTmp ); +#endif + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + if (nfSignature == SYMCRYPT_NUMBER_FORMAT_LSB_FIRST) + { + // To implement this revert the buffer properly + scError = SYMCRYPT_NOT_IMPLEMENTED; + goto cleanup; + } + + scError = SYMCRYPT_NO_ERROR; + +cleanup: + if (pbScratch!=NULL) + { + SymCryptWipe(pbScratch,cbScratch); + SymCryptCallbackFree(pbScratch); + } + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaPkcs1Verify( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _In_reads_bytes_( cbHashValue ) PCBYTE pbHashValue, + SIZE_T cbHashValue, + _In_reads_bytes_( cbSignature ) PCBYTE pbSignature, + SIZE_T cbSignature, + SYMCRYPT_NUMBER_FORMAT nfSignature, + _In_reads_opt_( nOIDCount ) PCSYMCRYPT_OID pHashOIDs, + _In_ SIZE_T nOIDCount, + UINT32 flags ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PBYTE pbScratch = NULL; + SIZE_T cbScratch = 0; + + PBYTE pbTmp = NULL; + SIZE_T cbTmp = SymCryptRsakeySizeofModulus(pkRsakey); + + // Make sure that the key may be used in Sign/Verify + if ( (pkRsakey->fAlgorithmInfo & SYMCRYPT_FLAG_RSAKEY_SIGN) == 0 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if (cbSignature > cbTmp) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if (nfSignature == SYMCRYPT_NUMBER_FORMAT_LSB_FIRST) + { + // To implement this revert the buffer properly + scError = SYMCRYPT_NOT_IMPLEMENTED; + goto cleanup; + } + + // The SYMCRYPT_SCRATCH_BYTES_FOR_RSA_PKCS1 macro does not + // overflow cbScratch since cbTmp < 2^17. + cbScratch = cbTmp + + SYMCRYPT_MAX( SymCryptRsaCoreEncScratchSpace( pkRsakey ), + SYMCRYPT_SCRATCH_BYTES_FOR_RSA_PKCS1( cbTmp ) ); + + pbScratch = (PBYTE)SymCryptCallbackAlloc( cbScratch ); + if (pbScratch == NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + pbTmp = pbScratch + cbScratch - cbTmp; + + scError = SymCryptRsaCoreEnc( + pkRsakey, + pbSignature, + cbSignature, + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, + flags, + pbTmp, + cbTmp, + pbScratch, + cbScratch - cbTmp ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + scError = SymCryptRsaPkcs1VerifySignaturePadding( + pbHashValue, + cbHashValue, + pHashOIDs, + nOIDCount, + pbTmp, + cbTmp, + flags, + pbScratch, + cbScratch - cbTmp ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + scError = SYMCRYPT_NO_ERROR; + +cleanup: + if (pbScratch!=NULL) + { + SymCryptWipe(pbScratch,cbScratch); + SymCryptCallbackFree(pbScratch); + } + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaPssSign( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _In_reads_bytes_( cbHashValue ) PCBYTE pbHashValue, + SIZE_T cbHashValue, + _In_ PCSYMCRYPT_HASH hashAlgorithm, + SIZE_T cbSalt, + UINT32 flags, + SYMCRYPT_NUMBER_FORMAT nfSignature, + _Out_writes_opt_( cbSignature ) PBYTE pbSignature, + SIZE_T cbSignature, + _Out_ SIZE_T *pcbSignature ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PBYTE pbScratch = NULL; + SIZE_T cbScratch = 0; + + PBYTE pbTmp = NULL; + SIZE_T cbTmp = SymCryptRsakeySizeofModulus(pkRsakey); + + // Make sure that the key may be used in Sign/Verify + if ( (pkRsakey->fAlgorithmInfo & SYMCRYPT_FLAG_RSAKEY_SIGN) == 0 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if ( (cbHashValue > cbTmp) || + (cbSalt > cbTmp) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Make sure that the key has a private key + if (!pkRsakey->hasPrivateKey) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + *pcbSignature = cbTmp; + + // Check if only *pcbSignature is needed + if (pbSignature == NULL) + { + scError = SYMCRYPT_NO_ERROR; + goto cleanup; + } + + // The SYMCRYPT_SCRATCH_BYTES_FOR_RSA_PSS macro does not + // overflow cbScratch since cbTmp < 2^17. +#if (SYMCRYPT_CRT_DECRYPTION) + cbScratch = cbTmp + + SYMCRYPT_MAX( SymCryptRsaCoreDecCrtScratchSpace( pkRsakey ), + SYMCRYPT_SCRATCH_BYTES_FOR_RSA_PSS( hashAlgorithm, cbHashValue, cbTmp ) ); +#else + cbScratch = cbTmp + + SYMCRYPT_MAX( SymCryptRsaCoreDecScratchSpace( pkRsakey ), + SYMCRYPT_SCRATCH_BYTES_FOR_RSA_PSS( hashAlgorithm, cbHashValue, cbTmp ) ); +#endif + + pbScratch = (PBYTE)SymCryptCallbackAlloc( cbScratch ); + if (pbScratch == NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + pbTmp = pbScratch + cbScratch - cbTmp; + + scError = SymCryptRsaPssApplySignaturePadding( + pbHashValue, + cbHashValue, + hashAlgorithm, + NULL, // For now only random salt supported + cbSalt, + pkRsakey->nBitsOfModulus, + flags, + pbTmp, + cbTmp, + pbScratch, + cbScratch - cbTmp ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + +#if (SYMCRYPT_CRT_DECRYPTION) + scError = SymCryptRsaCoreDecCrt( + pkRsakey, + pbTmp, + cbTmp, + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, + flags, + pbSignature, + cbSignature, + pbScratch, + cbScratch - cbTmp ); +#else + scError = SymCryptRsaCoreDec( + pkRsakey, + pbTmp, + cbTmp, + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, + flags, + pbSignature, + cbSignature, + pbScratch, + cbScratch - cbTmp ); +#endif + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + if (nfSignature == SYMCRYPT_NUMBER_FORMAT_LSB_FIRST) + { + // To implement this revert the buffer properly + scError = SYMCRYPT_NOT_IMPLEMENTED; + goto cleanup; + } + + scError = SYMCRYPT_NO_ERROR; + +cleanup: + if (pbScratch!=NULL) + { + SymCryptWipe(pbScratch,cbScratch); + SymCryptCallbackFree(pbScratch); + } + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaPssVerify( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _In_reads_bytes_( cbHashValue ) PCBYTE pbHashValue, + SIZE_T cbHashValue, + _In_reads_bytes_( cbSignature ) PCBYTE pbSignature, + SIZE_T cbSignature, + SYMCRYPT_NUMBER_FORMAT nfSignature, + _In_ PCSYMCRYPT_HASH hashAlgorithm, + SIZE_T cbSalt, + UINT32 flags ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PBYTE pbScratch = NULL; + SIZE_T cbScratch = 0; + + PBYTE pbTmp = NULL; + SIZE_T cbTmp = SymCryptRsakeySizeofModulus(pkRsakey); + + // Make sure that the key may be used in Sign/Verify + if ( (pkRsakey->fAlgorithmInfo & SYMCRYPT_FLAG_RSAKEY_SIGN) == 0 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if ( (cbHashValue > cbTmp) || + (cbSalt > cbTmp) || + (cbSignature > cbTmp) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if (nfSignature == SYMCRYPT_NUMBER_FORMAT_LSB_FIRST) + { + // To implement this revert the buffer properly + scError = SYMCRYPT_NOT_IMPLEMENTED; + goto cleanup; + } + + // The SYMCRYPT_SCRATCH_BYTES_FOR_RSA_PSS macro does not + // overflow cbScratch since cbTmp < 2^17. + cbScratch = cbTmp + + SYMCRYPT_MAX( SymCryptRsaCoreEncScratchSpace( pkRsakey ), + SYMCRYPT_SCRATCH_BYTES_FOR_RSA_PSS( hashAlgorithm, cbHashValue, cbTmp ) ); + + pbScratch = (PBYTE)SymCryptCallbackAlloc( cbScratch ); + if (pbScratch == NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + pbTmp = pbScratch + cbScratch - cbTmp; + + scError = SymCryptRsaCoreEnc( + pkRsakey, + pbSignature, + cbSignature, + SYMCRYPT_NUMBER_FORMAT_MSB_FIRST, + flags, + pbTmp, + cbTmp, + pbScratch, + cbScratch - cbTmp ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + scError = SymCryptRsaPssVerifySignaturePadding( + pbHashValue, + cbHashValue, + hashAlgorithm, + cbSalt, + pbTmp, + cbTmp, + pkRsakey->nBitsOfModulus, + flags, + pbScratch, + cbScratch - cbTmp ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + scError = SYMCRYPT_NO_ERROR; + +cleanup: + if (pbScratch!=NULL) + { + SymCryptWipe(pbScratch,cbScratch); + SymCryptCallbackFree(pbScratch); + } + + return scError; +} diff --git a/libs/symcrypt/lib/rsa_padding.c b/libs/symcrypt/lib/rsa_padding.c new file mode 100644 index 00000000000..d2f384b61c3 --- /dev/null +++ b/libs/symcrypt/lib/rsa_padding.c @@ -0,0 +1,1218 @@ +// +// rsa_padding.c RSA padding algorithms +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +#define ASN1_SEQUENCE_BYTE (0x30) +#define ASN1_OCTET_STRING_BYTE (0x04) + +#define PKCS_BLOCKTYPE_1 (0x01) // This is not used, added here for completeness +#define PKCS_BLOCKTYPE_2 (0x02) + +// +// Note: we could optimize these OID lists by using the same byte sequence for +// the long and short versions. +// +const SYMCRYPT_OID SymCryptMd5OidList[] = +{ + {12, (BYTE *)"\x06\x08\x2a\x86\x48\x86\xf7\x0d\x02\x05\x05\x00"}, + {10, (BYTE *)"\x06\x08\x2a\x86\x48\x86\xf7\x0d\x02\x05"}, +}; + +const SYMCRYPT_OID SymCryptSha1OidList[] = +{ + {9, (BYTE *)"\x06\x05\x2b\x0e\x03\x02\x1a\x05\x00"}, + {7, (BYTE *)"\x06\x05\x2b\x0e\x03\x02\x1a"} +}; + +const SYMCRYPT_OID SymCryptSha224OidList[] = +{ + {13, (BYTE *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x04\x05\x00"}, + {11, (BYTE *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x04"} +}; + +const SYMCRYPT_OID SymCryptSha256OidList[] = +{ + {13, (BYTE *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x01\x05\x00"}, + {11, (BYTE *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x01"} +}; + +const SYMCRYPT_OID SymCryptSha384OidList[] = +{ + {13, (BYTE *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x02\x05\x00"}, + {11, (BYTE *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x02"} +}; + +const SYMCRYPT_OID SymCryptSha512OidList[] = +{ + {13, (BYTE *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x03\x05\x00"}, + {11, (BYTE *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x03"} +}; + +const SYMCRYPT_OID SymCryptSha512_224OidList[] = +{ + {13, (BYTE *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x05\x05\x00"}, + {11, (BYTE *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x05"} +}; + +const SYMCRYPT_OID SymCryptSha512_256OidList[] = +{ + {13, (BYTE *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x06\x05\x00"}, + {11, (BYTE *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x06"} +}; + +const SYMCRYPT_OID SymCryptSha3_224OidList[] = +{ + {13, (BYTE *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x07\x05\x00"}, + {11, (BYTE *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x07"} +}; + +const SYMCRYPT_OID SymCryptSha3_256OidList[] = +{ + {13, (BYTE *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x08\x05\x00"}, + {11, (BYTE *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x08"} +}; + +const SYMCRYPT_OID SymCryptSha3_384OidList[] = +{ + {13, (BYTE *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x09\x05\x00"}, + {11, (BYTE *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x09"} +}; + +const SYMCRYPT_OID SymCryptSha3_512OidList[] = +{ + {13, (BYTE *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x0a\x05\x00"}, + {11, (BYTE *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x0a"} +}; + +const SYMCRYPT_OID SymCryptShake128OidList[] = +{ + {13, (BYTE *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x0b\x05\x00"}, + {11, (BYTE *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x0b"} +}; + +const SYMCRYPT_OID SymCryptShake256OidList[] = +{ + {13, (BYTE *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x0c\x05\x00"}, + {11, (BYTE *)"\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x0c"} +}; + +VOID +SYMCRYPT_CALL +SymCryptRsaPaddingMaskGeneration( + _In_ PCSYMCRYPT_HASH hashAlgorithm, + _In_ PVOID pHashState, + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + _Out_writes_bytes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst ) +{ + SIZE_T cIterations = 0; + + BYTE rgbHash[SYMCRYPT_HASH_MAX_RESULT_SIZE] = { 0 }; + BYTE rgbCount[sizeof(UINT32)] = { 0 }; + PBYTE pbCount = NULL; + SIZE_T cbMaskRemaining = cbDst; + PBYTE pbMaskIndex = pbDst; + + BOOLEAN fAvoidDWORDReverse = FALSE; + + SIZE_T cbHashAlg = SymCryptHashResultSize( hashAlgorithm ); + + cIterations = (cbDst + (cbHashAlg - 1)) / cbHashAlg; + if (cIterations < 256) + { + fAvoidDWORDReverse = TRUE; + } + + for (UINT32 i = 0; i < cIterations; i++) + { + SymCryptHashInit( hashAlgorithm, pHashState ); + + // hash the seed + SymCryptHashAppend( hashAlgorithm, pHashState, pbSrc, cbSrc ); + + // Reverse the count bytes + pbCount = (BYTE*)&i; + if (fAvoidDWORDReverse) + { + rgbCount[3] = pbCount[0]; + } + else + { + for (UINT32 j = 0; j < sizeof(UINT32); j++) + { + rgbCount[j] = pbCount[sizeof(UINT32) - j - 1]; + } + } + + // hash the count + SymCryptHashAppend( hashAlgorithm, pHashState, rgbCount, sizeof(UINT32) ); + + // copy the bytes from this hash into the mask buffer + if (cbMaskRemaining >= cbHashAlg) + { + SymCryptHashResult( hashAlgorithm, pHashState, pbMaskIndex, cbHashAlg ); + + cbMaskRemaining -= cbHashAlg; + pbMaskIndex += cbHashAlg; + } + else + { + SymCryptHashResult( hashAlgorithm, pHashState, rgbHash, cbHashAlg ); + + memcpy( pbMaskIndex, rgbHash, cbMaskRemaining); + break; + } + } +} + +// +// PKCS1 Encryption Format: +// 0x00 || 0x02 || PS || 0x00 || M +// +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaPkcs1ApplyEncryptionPadding( + _In_reads_bytes_( cbPlaintext ) PCBYTE pbPlaintext, + SIZE_T cbPlaintext, + _Out_writes_bytes_( cbPkcs1Format ) PBYTE pbPkcs1Format, + SIZE_T cbPkcs1Format ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + // Format: 00 02 <PS> 00 <M> + // <PS> 8 or more padding bytes, random, all nonzero + // <M> message, length between 0 and cbPKCS1Format - 11. + // See RFC 3447 for more details. + + SIZE_T cbPS; + SIZE_T i; + + // ensure output buffer is big enough (padding has 11 bytes overhead) + if( cbPkcs1Format < (cbPlaintext + 11) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + cbPS = cbPkcs1Format - (cbPlaintext + 3); + + pbPkcs1Format[0] = 0x00; + pbPkcs1Format[1] = PKCS_BLOCKTYPE_2; + + scError = SymCryptCallbackRandom( &pbPkcs1Format[2], cbPS ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Make sure that none of the bytes in PS is zero (as per specs) + for( i = 0; i < cbPS; i++ ) + { + while( pbPkcs1Format[2 + i] == 0x00 ) + { + scError = SymCryptCallbackRandom( &pbPkcs1Format[2+i], 1 ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + } + } + + pbPkcs1Format[2 + cbPS] = 0x00; + + memcpy(pbPkcs1Format + 3 + cbPS, pbPlaintext, cbPlaintext); + +cleanup: + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaPkcs1RemoveEncryptionPadding( + _Inout_updates_bytes_( cbPkcs1Buffer ) PBYTE pbPkcs1Format, + SIZE_T cbPkcs1Format, + SIZE_T cbPkcs1Buffer, + _Out_writes_bytes_opt_( cbPlaintext ) PBYTE pbPlaintext, + SIZE_T cbPlaintext, + _Out_ SIZE_T *pcbPlaintext ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + UINT32 mPaddingError = 0; + UINT32 mBufferSizeError = 0; + + UINT32 cbPlaintextResult = 0; + UINT32 i; + UINT32 mByteIsZero; + UINT32 mLengthFound; + UINT32 iFirstZero; + UINT32 cbPlaintextTruncated; + + SYMCRYPT_ASSERT( cbPkcs1Buffer >= cbPkcs1Format ); + SYMCRYPT_ASSERT( cbPkcs1Buffer >= 32 ); // Requirements for SymcryptScsRotateBuffer + SYMCRYPT_ASSERT( (cbPkcs1Buffer & (cbPkcs1Buffer - 1)) == 0 ); // must be a power of 2 + SYMCRYPT_ASSERT( cbPkcs1Buffer <= (1 << 30 )); // Ensure we can use 31-bit masking operations + + // Format: 00 02 <PS> 00 <M> + // <PS> 8 or more padding bytes, random, all nonzero + // <M> message, length between 0 and cbPKCS1Format - 11. + // See RFC 3447 for more details. + // We do not reveal the buffer contents through side-channels to avoid Bleichenbacher-style attacks + // This includes the plaintext length, which is determined by the location of the 00 byte + + if ( cbPkcs1Format < 11 ) + { + // cbPKCS1Format is public, so the if() is safe. 11 is the total overhead + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + // this also implies that cbPkcs1Buffer >= 16 + + // Check the leading bytes + mPaddingError |= SymCryptMask32IsNonzeroU31( pbPkcs1Format[0] ); // First byte must be = 0 + mPaddingError |= SymCryptMask32NeqU31( pbPkcs1Format[1], PKCS_BLOCKTYPE_2 ); // Second byte must be = 2 + + iFirstZero = 0; + mLengthFound = 0; + for (i = 2; i < cbPkcs1Format; i++) + { + mByteIsZero = SymCryptMask32IsZeroU31( pbPkcs1Format[i] ); + + // remember the index of the first zero byte + iFirstZero |= i & mByteIsZero & ~mLengthFound; + mLengthFound |= mByteIsZero; + } + mPaddingError |= ~mLengthFound; + + // At this point: + // - iFirstZero points to the first zero byte, or is 0 if there is no zero byte + // - mPaddingError is set if no zero byte was found + + // It is an error if the first zero is at index < 10 as <PS> needs to be at least 8 bytes + mPaddingError |= SymCryptMask32LtU31( iFirstZero, 10 ); + + // Compute the # bytes of the message; 0 if there was a padding error + cbPlaintextResult = ~mPaddingError & ((UINT32)(cbPkcs1Format - iFirstZero - 1)); + + // We're done if the caller didn't want the actual message, but only the size. + // We do that before checking the size of the plaintext buffer so that callers who + // only want the size do not get an error. + if( pbPlaintext == NULL ) + { + // Condition is public. + goto cleanup; + } + + // Checking that the output buffer is large enough is a bit tricky as we have a SIZE_T as + // buffer size, but we like to work on 31-bit integers as they have better mask algorithm perf. + // We can truncate the SIZE_T and check for equality, which is side-channel safe. + cbPlaintextTruncated = ((UINT32) cbPlaintext) & 0x7fffffff; // Truncate to 31 bits + if( cbPlaintextTruncated == cbPlaintext ) + { + // Condition is public as we write the whole plaintext buffer anyway. + mBufferSizeError = SymCryptMask32LtU31( cbPlaintextTruncated, cbPlaintextResult ); + } + + // The message starts at iFirstZero + 1, which is a variable location so we can't just memcpy it without + // revealing information through side channels. + // Instead we rotate the buffer left (side-channel safe) so that the message appears at the front. + // Rotation constant is such that the message appears at the start. + SymCryptScsRotateBuffer( pbPkcs1Format, cbPkcs1Buffer, (iFirstZero + 1) & (cbPkcs1Buffer - 1) ); + + // The ScsCopy function can copy the data to the destination buffer, but the input buffer must be + // as long as the output buffer. We can't just use cbPlaintext as the output buffer size, as it is + // unbounded. But we can limit it to cbPkcs1Format as that is the public key size and is public. + SymCryptScsCopy( pbPkcs1Format, cbPlaintextResult, pbPlaintext, SYMCRYPT_MIN( cbPlaintext, cbPkcs1Format ) ); + +cleanup: + // Update scError with the two error masks. Padding error given highest priority. + scError ^= mBufferSizeError & (scError ^ SYMCRYPT_BUFFER_TOO_SMALL); + scError ^= mPaddingError & (scError ^ SYMCRYPT_INVALID_ARGUMENT); + + *pcbPlaintext = cbPlaintextResult; + return scError; +} + +// +// OAEP Encryption Format: +// +----------+---------+-------+ +// DB = | lHash | PS | M | +// +----------+---------+-------+ +// | +// +----------+ V +// | seed |--> MGF ---> xor +// +----------+ | +// | | +// +--+ V | +// |00| xor <----- MGF <-----| +// +--+ | | +// | | | +// V V V +// +--+----------+----------------------------+ +// EM = |00|maskedSeed| maskedDB | +// +--+----------+----------------------------+ +// +// PS = zero or more bytes 0x00 || 0x01 +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaOaepApplyEncryptionPadding( + _In_reads_bytes_( cbPlaintext ) PCBYTE pbPlaintext, + SIZE_T cbPlaintext, + _In_ PCSYMCRYPT_HASH hashAlgorithm, + _In_reads_bytes_( cbLabel ) PCBYTE pbLabel, + SIZE_T cbLabel, + _In_reads_bytes_opt_( cbSeed ) PCBYTE pbSeed, + SIZE_T cbSeed, + _Out_writes_bytes_( cbOaepFormat ) PBYTE pbOaepFormat, + SIZE_T cbOaepFormat, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PVOID pHashState; + + PBYTE pbSeedInternal; + PBYTE pbSeedMask; + PBYTE pbDB; + PBYTE pbDBMask; + + SIZE_T cbDB; + SIZE_T cbPS; + + SIZE_T cbHash = SymCryptHashResultSize( hashAlgorithm ); + SIZE_T cbHashState = SymCryptHashStateSize( hashAlgorithm ); + + UNREFERENCED_PARAMETER( cbScratch ); + + // OAEP overhead is 2 + 2 * size of hash result + if( cbOaepFormat < (cbPlaintext + (cbHash * 2) + 2) || + ((pbSeed!=NULL) && (cbSeed>cbHash)) || + ((pbSeed==NULL) && (cbSeed!=0)) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + cbPS = cbOaepFormat - (cbPlaintext + (cbHash * 2) + 2); + cbDB = cbOaepFormat - (cbHash + 1); + + SYMCRYPT_ASSERT( cbScratch >= cbHashState + (cbHash * 2) + (cbDB * 2) ); + + pHashState = (PVOID) pbScratch; + pbSeedInternal = pbScratch + cbHashState; + pbSeedMask = pbSeedInternal + cbHash; + pbDB = pbSeedMask + cbHash; + pbDBMask = pbDB + cbDB; + + // hash the label + SymCryptHash( hashAlgorithm, pbLabel, cbLabel, pbDB, cbHash ); + + SymCryptWipe(pbDB + cbHash, cbPS); + pbDB[cbHash + cbPS] = 0x01; + + // dcl - are we quite sure that none of these numbers are under attacker control? + memcpy(pbDB + cbHash + cbPS + 1, pbPlaintext, cbPlaintext); + + if (NULL == pbSeed) + { + // generate the random seed (same length as the hash result) + scError = SymCryptCallbackRandom( pbSeedInternal, cbHash ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + } + else + { + SymCryptWipe( pbSeedInternal, cbHash ); + memcpy(pbSeedInternal, pbSeed, cbSeed); + } + + // MGF(seed) + SymCryptRsaPaddingMaskGeneration( + hashAlgorithm, + pHashState, + pbSeedInternal, + cbHash, + pbDBMask, + cbDB); + + // set the most significant byte to 0x00 + pbOaepFormat[0] = 0x00; + + // XOR the DB and the mask MGF(seed) + for (UINT32 i = 0; i < cbDB; i++) + { + pbOaepFormat[cbHash + 1 + i] = pbDB[i] ^ pbDBMask[i]; + } + + // MGF(masked DB) + SymCryptRsaPaddingMaskGeneration( + hashAlgorithm, + pHashState, + pbOaepFormat + cbHash + 1, + cbDB, + pbSeedMask, + cbHash); + + // XOR the seed and the seed mask MGF(masked DB) + for (UINT32 i = 0; i < cbHash; i++) + { + pbOaepFormat[1 + i] = pbSeedInternal[i] ^ pbSeedMask[i]; + } + + scError = SYMCRYPT_NO_ERROR; + +cleanup: + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaOaepRemoveEncryptionPadding( + _In_reads_bytes_( cbOAEPFormat ) + PCBYTE pbOAEPFormat, + SIZE_T cbOAEPFormat, + _In_ PCSYMCRYPT_HASH hashAlgorithm, + _In_reads_bytes_( cbLabel ) PCBYTE pbLabel, + SIZE_T cbLabel, + UINT32 flags, + _Out_writes_bytes_( cbPlaintext ) + PBYTE pbPlaintext, + SIZE_T cbPlaintext, + _Out_ SIZE_T *pcbPlaintext, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PVOID pHashState; + + PBYTE pbSeedMask; + PBYTE pbSeed; + PBYTE pbDBMask; + PBYTE pbDB; + PBYTE pbLabelHash; + UINT32 mPaddingError; + + SIZE_T cbDB; + + SIZE_T cnt = 0; + + SIZE_T cbHashAlg = SymCryptHashResultSize( hashAlgorithm ); + SIZE_T cbHashState = SymCryptHashStateSize( hashAlgorithm ); + + UNREFERENCED_PARAMETER( cbScratch ); + + if (flags != 0) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // check if the most significant byte is set to 0x00 + mPaddingError = SymCryptMask32IsNonzeroU31( pbOAEPFormat[0] ); + + // Padding overhead is 2 hash values plus 2 bytes + if( cbOAEPFormat < (2*cbHashAlg + 2) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + cbDB = cbOAEPFormat - (cbHashAlg + 1); + + SYMCRYPT_ASSERT( cbScratch >= cbHashState + (cbHashAlg * 3) + (cbDB * 2) ); + + pHashState = (PVOID) pbScratch; + pbSeedMask = pbScratch + cbHashState; + pbSeed = pbSeedMask + cbHashAlg; + pbDBMask = pbSeed + cbHashAlg; + pbDB = pbDBMask + cbDB; + pbLabelHash = pbDB + cbDB; + + // MGF(masked DB) + SymCryptRsaPaddingMaskGeneration( + hashAlgorithm, + pHashState, + pbOAEPFormat + cbHashAlg + 1, + cbDB, + pbSeedMask, + cbHashAlg); + + // XOR the masked seed and the seed mask MGF(masked DB) + for (UINT32 i = 0; i < cbHashAlg; i++) + { + pbSeed[i] = pbOAEPFormat[1 + i] ^ pbSeedMask[i]; + } + + // MGF(seed) + SymCryptRsaPaddingMaskGeneration( + hashAlgorithm, + pHashState, + pbSeed, + cbHashAlg, + pbDBMask, + cbDB); + + // XOR the masked DB and the mask MGF(seed) + for (UINT32 i = 0; i < cbDB; i++) + { + pbDB[i] = pbOAEPFormat[cbHashAlg + 1 + i] ^ pbDBMask[i]; + } + + // hash the label + SymCryptHash( hashAlgorithm, pbLabel, cbLabel, pbLabelHash, cbHashAlg ); + + // check the label hash + mPaddingError |= SymCryptMask32IsZeroU31( SymCryptEqual( pbLabelHash, pbDB, cbHashAlg ) ); + + // + // At this point we have verified the leading 0 byte and the label hash, with any + // errors in mPaddingError. We could continue to make the entire padding removal + // side-channel safe like we do in the PKCS1 padding case, but that is not necessary. + // The side-channel only leaks data if the attacker can trigger two different behaviours + // and derive information from the difference. + // This is relatively easy to do with something like a match on 1 or 2 bytes because the + // chance of satisfying the check on a random input is still useful. But here we have + // matched 33 bytes (assuming a 32-byte hash) and the Bleichenbacher style attacks don't + // work beyond this point. Basically, these attacks produce ciphertexts without knowing + // the corresponding plaintext, and the chance of the label hash matching is something + // like 2^{-256}. So these ciphertexts will always fail right here, and there is no + // difference of behaviour that leaks data to the attacker. + // Thus, we can switch back to normal processing of the errors here. + // + + if( mPaddingError != 0 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // check the PS + for (cnt = cbHashAlg; cnt < cbDB; cnt++) + { + if (pbDB[cnt] == 0x01) + { + cnt++; + break; + } + else if (pbDB[cnt] != 0x00) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + } + + if (pbDB[cnt - 1] != 0x01) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // the rest is data + *pcbPlaintext = cbDB - cnt; + + if(NULL == pbPlaintext) + { + scError = SYMCRYPT_NO_ERROR; + goto cleanup; + } + + if (cbPlaintext < *pcbPlaintext) + { + scError = SYMCRYPT_BUFFER_TOO_SMALL; + goto cleanup; + } + + memcpy(pbPlaintext, pbDB + cnt, *pcbPlaintext); + + scError = SYMCRYPT_NO_ERROR; + +cleanup: + + return scError; +} + +// +// PKCS1 Signature Format: +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaPkcs1ApplySignaturePadding( + _In_reads_bytes_( cbHash ) PCBYTE pbHash, + SIZE_T cbHash, + _In_reads_bytes_( cbHashOid ) + PCBYTE pbHashOid, + SIZE_T cbHashOid, + UINT32 flags, + _Out_writes_bytes_( cbPKCS1Format ) + PBYTE pbPKCS1Format, + SIZE_T cbPKCS1Format ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + SIZE_T cbEncoding; + SIZE_T cbPadding; + SIZE_T cbOidOffset; + + BOOLEAN fInsertASN1 = TRUE; + + if ((flags & ~SYMCRYPT_FLAG_RSA_PKCS1_NO_ASN1) != 0) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Simple check to avoid funky behavior if cbHash is close to SIZE_MAX + if (cbHash >= cbPKCS1Format) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + fInsertASN1 = ((flags & SYMCRYPT_FLAG_RSA_PKCS1_NO_ASN1) == 0); + + if (fInsertASN1) + { + if ( (pbHashOid!=NULL) && (cbHashOid>0) ) + { + // determine the length of the ASN1 Encoding + // 2 sequence bytes, 1 id byte and 3 length bytes + cbEncoding = 6 + cbHashOid + cbHash; + } + else + { + if (cbHashOid > 0) + { + // The caller has passed a NULL hash and a non 0 size for it. + // We can't guess the intent, hence we fail + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // special case for MD5 hash without OID + cbEncoding = 2 + cbHash; + } + + // we don't support encodings longer than 128 bytes, + // with this check we know that the length of the OID as + // well as the length of the hash value will each fit in + // one byte + if (cbEncoding > 0x80) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + } + else + { + cbEncoding = cbHash; + } + + // In a few scenarios (involving small RSA keys), the new large SHA + // hashes are too big to be signed by the specified key. + // There must be at least 8 bytes of 0xff. + if (3 + 8 + cbEncoding > cbPKCS1Format) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + cbPadding = cbPKCS1Format - 3 - cbEncoding; + + + // insert the block type and delimiters + pbPKCS1Format[0] = 0x00; + pbPKCS1Format[1] = 0x01; + pbPKCS1Format[2 + cbPadding] = 0x00; + + // insert the type 1 padding + memset(pbPKCS1Format + 2, 0xff, cbPadding); + + if (fInsertASN1) + { + cbOidOffset = 1; + if ( (pbHashOid!=NULL) && (cbHashOid>0) ) + { + // insert the algorithm encoding + pbPKCS1Format[2 + cbPadding + 1] = ASN1_SEQUENCE_BYTE; + pbPKCS1Format[2 + cbPadding + 2] = (BYTE)cbEncoding - 2; + + // insert the sequence string byte, length of the hash and the hash value + pbPKCS1Format[2 + cbPadding + 3] = ASN1_SEQUENCE_BYTE; + pbPKCS1Format[2 + cbPadding + 4] = (BYTE)cbHashOid; + cbOidOffset += 4; + memcpy(pbPKCS1Format + 2 + cbPadding + cbOidOffset, pbHashOid, cbHashOid); + } + + // insert the octet string byte, length of the hash and the hash value + pbPKCS1Format[2 + cbPadding + cbOidOffset + cbHashOid] = ASN1_OCTET_STRING_BYTE; + pbPKCS1Format[2 + cbPadding + cbOidOffset + cbHashOid + 1] = (BYTE)cbHash; + memcpy(pbPKCS1Format + 2 + cbPadding + cbOidOffset + cbHashOid + 2, pbHash, cbHash); + } + else + { + memcpy(pbPKCS1Format + 3 + cbPadding, pbHash, cbHash); + } + + scError = SYMCRYPT_NO_ERROR; + +cleanup: + + return scError; +} + +// +// Check if a PKCS1 padding is valid with regard to a hash oid +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaPkcs1CheckSignaturePadding( + _In_reads_bytes_( cbHash ) PCBYTE pbHash, + SIZE_T cbHash, + _In_reads_bytes_( cbHashOid ) + PCBYTE pbHashOid, + SIZE_T cbHashOid, + _In_reads_bytes_( cbPKCS1Format ) + PCBYTE pbPKCS1Format, + UINT32 flags, + _Out_writes_bytes_( cbPKCS1Format ) + PBYTE pbScratch, + SIZE_T cbPKCS1Format) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + SymCryptWipe(pbScratch, cbPKCS1Format); + + scError = SymCryptRsaPkcs1ApplySignaturePadding( + pbHash, + cbHash, + pbHashOid, + cbHashOid, + flags, + pbScratch, + cbPKCS1Format ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + if ( SymCryptEqual(pbScratch, pbPKCS1Format, cbPKCS1Format) ) + { + scError = SYMCRYPT_NO_ERROR; + } + else + { + scError = SYMCRYPT_SIGNATURE_VERIFICATION_FAILURE; + } + +cleanup: + + return scError; +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaPkcs1VerifySignaturePadding( + _In_reads_bytes_( cbHash ) PCBYTE pbHash, + SIZE_T cbHash, + _In_reads_( nOIDCount ) PCSYMCRYPT_OID pHashOIDs, + _In_ SIZE_T nOIDCount, + _In_reads_bytes_( cbPKCS1Format ) + PCBYTE pbPKCS1Format, + SIZE_T cbPKCS1Format, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + UINT32 i = 0; + + UNREFERENCED_PARAMETER( cbScratch ); + SYMCRYPT_ASSERT( cbScratch >= cbPKCS1Format ); + + if ((flags & ~SYMCRYPT_FLAG_RSA_PKCS1_OPTIONAL_HASH_OID) != 0) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // + // Verify padding and the hash value + // + if (pHashOIDs) + { + for (i = 0; i < nOIDCount; i++) + { + scError = SymCryptRsaPkcs1CheckSignaturePadding( + pbHash, + cbHash, + pHashOIDs[i].pbOID, + pHashOIDs[i].cbOID, + pbPKCS1Format, + 0, + pbScratch, + cbPKCS1Format ); + if (scError == SYMCRYPT_NO_ERROR) + { + break; + } + } + } + + if ((pHashOIDs == NULL ) || + (scError != SYMCRYPT_NO_ERROR && + flags & SYMCRYPT_FLAG_RSA_PKCS1_OPTIONAL_HASH_OID)) + { + // if no OID is passed in, or + // OID is passed in but failed verification, but OID is optional + scError = SymCryptRsaPkcs1CheckSignaturePadding( + pbHash, + cbHash, + NULL, + 0, + pbPKCS1Format, + SYMCRYPT_FLAG_RSA_PKCS1_NO_ASN1, + pbScratch, + cbPKCS1Format ); + } + +cleanup: + + return scError; +} + +// +// PSS Signature Format: +// +--------+----------+----------+ +// M' = |Padding1| Hash M | salt | +// +--------+----------+----------+ +// | +// +--------+----------+ V +// DB = |Padding2| salt | Hash +// +--------+----------+ | +// | | +// V | +--+ +// xor <--- MGF <---| |bc| +// | | +--+ +// | | | +// V V V +// +-------------------+----------+--+ +// EM = | maskedDB | H |bc| +// +-------------------+----------+--+ +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaPssApplySignaturePadding( + _In_reads_bytes_( cbHash ) PCBYTE pbHash, + SIZE_T cbHash, + _In_ PCSYMCRYPT_HASH hashAlgorithm, + _In_reads_bytes_opt_( cbSalt ) + PCBYTE pbSalt, + _In_range_(0, cbPSSFormat) SIZE_T cbSalt, + UINT32 nBitsOfModulus, + UINT32 flags, + _Out_writes_bytes_( cbPSSFormat ) + PBYTE pbPSSFormat, + SIZE_T cbPSSFormat, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PVOID pHashState; + + PBYTE pbMPrime; + PBYTE pbDB; + PBYTE pbDBMask; + + SIZE_T cbDB; + SIZE_T cbMPrime; + SIZE_T cbPadding2; + + SIZE_T dwZeroBits = 0; // Number of bits of the leftmost bit to be zeroed + + SIZE_T cbHashAlg = SymCryptHashResultSize( hashAlgorithm ); + SIZE_T cbHashState = SymCryptHashStateSize( hashAlgorithm ); + + UNREFERENCED_PARAMETER( cbScratch ); + + if ((cbPSSFormat == 0) || (pbPSSFormat == NULL)) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Corner case of RFC 3447 for PSS: + // If nBitsOfModulus == 1 mod 8, then emBits = nBitsOfModulus - 1 == 0 mod 8 + // Thus the size of the input buffer in bytes is emLen = ceil(emBits /8), + // one smaller than the size of the modulus. Fix this here by setting the + // leftmost byte of the output equal to 0. + if (nBitsOfModulus%8 == 1) + { + pbPSSFormat[0] = 0; + pbPSSFormat++; + cbPSSFormat--; + } + + if ((flags!=0) || + (cbPSSFormat < (cbHashAlg + cbSalt + 2)) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + cbDB = cbPSSFormat - (cbHashAlg + 1); + cbPadding2 = cbDB - cbSalt - 1; + cbMPrime = 8 + cbHash + cbSalt; + + SYMCRYPT_ASSERT( cbScratch >= cbHashState + cbMPrime + (cbDB * 2) ); + + pHashState = (PVOID) pbScratch; + pbMPrime = pbScratch + cbHashState; + pbDB = pbMPrime + cbMPrime; + pbDBMask = pbDB + cbDB; + + // set up the M Prime + SymCryptWipe(pbMPrime, 8); + memcpy(pbMPrime + 8, pbHash, cbHash); + + if (NULL == pbSalt) + { + // generate the random salt + scError = SymCryptCallbackRandom( + pbMPrime + 8 + cbHash, + cbSalt); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + } + else + { + // copy the salt passed + memcpy(pbMPrime + 8 + cbHash, pbSalt, cbSalt); + } + + // hash the MPrime + SymCryptHash( hashAlgorithm, pbMPrime, cbMPrime, pbPSSFormat + cbDB, cbHashAlg ); + + // copy the same salt into the DB + SymCryptWipe(pbDB, cbPadding2); + pbDB[cbPadding2] = 0x01; + memcpy(pbDB + cbPadding2 + 1, pbMPrime + 8 + cbHash, cbSalt); + + // MGF(Hash of MPrime) + SymCryptRsaPaddingMaskGeneration( + hashAlgorithm, + pHashState, + pbPSSFormat + cbDB, + cbHashAlg, + pbDBMask, + cbDB); + + // XOR the DB and the mask MGF(seed) + for (UINT32 i = 0; i < cbDB; i++) + { + pbPSSFormat[i] = pbDB[i] ^ pbDBMask[i]; + } + + // calculate the number of bits to be zeroed + dwZeroBits = 8*cbPSSFormat + 1 - nBitsOfModulus; + + // mask off dwZeroBits worth of the encoded message + pbPSSFormat[0] &= (BYTE)(0xff >> dwZeroBits); + + // set the least significant byte of pbPSSFormat to bc + pbPSSFormat[cbPSSFormat - 1] = 0xbc; + + scError = SYMCRYPT_NO_ERROR; + +cleanup: + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaPssVerifySignaturePadding( + _In_reads_bytes_( cbHash ) PCBYTE pbHash, + SIZE_T cbHash, + _In_ PCSYMCRYPT_HASH hashAlgorithm, + _In_range_(0, cbPSSFormat) SIZE_T cbSalt, + _In_reads_bytes_( cbPSSFormat ) + PCBYTE pbPSSFormat, + SIZE_T cbPSSFormat, + UINT32 nBitsOfModulus, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + PVOID pHashState; + + PBYTE pbDBMask; + PBYTE pbMPrime; + PBYTE pbMPrimeHash; + PCBYTE pbHashOfMPrimeIndex; + + SIZE_T cbDB; + SIZE_T cbMPrime; + SIZE_T cbPadding2; + SIZE_T cbSaltObserved; + + SIZE_T dwZeroBits = 0; // Number of bits of the leftmost bit to be zeroed + + SIZE_T cbHashAlg = SymCryptHashResultSize( hashAlgorithm ); + SIZE_T cbHashState = SymCryptHashStateSize( hashAlgorithm ); + + UNREFERENCED_PARAMETER( cbScratch ); + + if (((flags & ~SYMCRYPT_FLAG_RSA_PSS_VERIFY_WITH_MINIMUM_SALT) != 0) || + (cbPSSFormat == 0) || + (pbPSSFormat == NULL)) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Corner case of RFC 3447 for PSS: + // If nBitsOfModulus == 1 mod 8, then emBits = nBitsOfModulus - 1 == 0 mod 8 + // Thus the size of the input buffer in bytes is emLen = ceil(emBits /8), + // one smaller than the size of the modulus. Fix this here by checking that the + // leftmost byte of the input equals 0. + if (nBitsOfModulus%8 == 1) + { + if (pbPSSFormat[0] != 0) + { + scError = SYMCRYPT_SIGNATURE_VERIFICATION_FAILURE; + goto cleanup; + } + pbPSSFormat++; + cbPSSFormat--; + } + + // calculate the number of bits to be zeroed + dwZeroBits = 8*cbPSSFormat + 1 - nBitsOfModulus; + + // check the most significant dwZeroBits bits to ensure they're zero and + // check the least significant byte + if( (cbPSSFormat < (cbHashAlg + cbSalt + 2)) || + (pbPSSFormat[0] & (BYTE)(0xff << (8 - dwZeroBits))) != 0 || + pbPSSFormat[cbPSSFormat - 1] != 0xbc + ) + { + scError = SYMCRYPT_SIGNATURE_VERIFICATION_FAILURE; + goto cleanup; + } + + cbDB = cbPSSFormat - (cbHashAlg + 1); + + pHashState = (PVOID) pbScratch; + pbDBMask = pbScratch + cbHashState; + + // index to hash of M Prime + pbHashOfMPrimeIndex = pbPSSFormat + (cbPSSFormat - (cbHashAlg + 1)); + + // MGF(masked DB) + SymCryptRsaPaddingMaskGeneration( + hashAlgorithm, + pHashState, + pbHashOfMPrimeIndex, + cbHashAlg, + pbDBMask, + cbDB); + + // XOR the DB and the DB mask and store the result in pbDBMask (not needed after this) + for (UINT32 i = 0; i < cbDB; i++) + { + pbDBMask[i] = pbPSSFormat[i] ^ pbDBMask[i]; + } + + // mask off the first dwZeroBits + pbDBMask[0] &= (BYTE)(0xff >> dwZeroBits); + + // find the length of the all-zeroes padding2 in pbDBMask + // padding2 must be terminated by a 0x01 byte + for (cbPadding2 = 0; cbPadding2 < (cbDB - cbSalt); cbPadding2++) + { + if (pbDBMask[cbPadding2] == 0x01) + { + // we have reached the end of padding2 + break; + } + + if (pbDBMask[cbPadding2] != 0x00) + { + // non-zero byte in what should be padding2 + scError = SYMCRYPT_SIGNATURE_VERIFICATION_FAILURE; + goto cleanup; + } + } + + // Here we have either: + // cbPadding2 == cbDB - cbSalt, which means the padding is too long + // or + // cbPadding2 <= cbDB - cbSalt - 1, and we have broken out of the loop when we found the 0x01 byte + if( cbPadding2 == cbDB - cbSalt ) + { + scError = SYMCRYPT_SIGNATURE_VERIFICATION_FAILURE; + goto cleanup; + } + + cbSaltObserved = cbDB - cbPadding2 - 1; + // cbSalt <= cbDB - cbPadding2 - 1 = cbSaltObserved + // so cbSaltObserved is acceptable value for signature verification + // with SYMCRYPT_FLAG_RSA_PSS_VERIFY_WITH_MINIMUM_SALT + + if( ((flags & SYMCRYPT_FLAG_RSA_PSS_VERIFY_WITH_MINIMUM_SALT) == 0) && + cbSaltObserved != cbSalt ) + { + // When SYMCRYPT_FLAG_RSA_PSS_VERIFY_WITH_MINIMUM_SALT not specified, + // we require salt length observed to exactly match the caller provided salt length + scError = SYMCRYPT_SIGNATURE_VERIFICATION_FAILURE; + goto cleanup; + } + + pbMPrime = pbDBMask + cbDB; + cbMPrime = 8 + cbHash + cbSaltObserved; + pbMPrimeHash = pbMPrime + cbMPrime; + + SYMCRYPT_ASSERT( cbScratch >= cbHashState + cbDB + cbMPrime + cbHashAlg ); + + // create the M Prime + SymCryptWipe(pbMPrime, 8); + memcpy(pbMPrime + 8, pbHash, cbHash); + memcpy(pbMPrime + 8 + cbHash, + pbDBMask + (cbDB - cbSaltObserved), + cbSaltObserved); + + // hash the M Prime + SymCryptHash( hashAlgorithm, pbMPrime, cbMPrime, pbMPrimeHash, cbHashAlg ); + + if ( !SymCryptEqual(pbPSSFormat + cbDB, pbMPrimeHash, cbHashAlg) ) + { + scError = SYMCRYPT_SIGNATURE_VERIFICATION_FAILURE; + goto cleanup; + } + + scError = SYMCRYPT_NO_ERROR; + +cleanup: + return scError; +} diff --git a/libs/symcrypt/lib/rsakey.c b/libs/symcrypt/lib/rsakey.c new file mode 100644 index 00000000000..32b802ba7e6 --- /dev/null +++ b/libs/symcrypt/lib/rsakey.c @@ -0,0 +1,1631 @@ +// +// rsakey.c RSA keys' related algorithms +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +#define RSA_DEFAULT_PUBLIC_EXPONENT (65537) + +PSYMCRYPT_RSAKEY +SYMCRYPT_CALL +SymCryptRsakeyAllocate( + _In_ PCSYMCRYPT_RSA_PARAMS pParams, + _In_ UINT32 flags ) +{ + PVOID p; + SIZE_T cb; + PSYMCRYPT_RSAKEY res = NULL; + + UNREFERENCED_PARAMETER( flags ); + + SYMCRYPT_ASSERT( pParams != NULL ); + + cb = SymCryptSizeofRsakeyFromParams( pParams ); + + p = SymCryptCallbackAlloc( cb ); + + if ( p==NULL ) + { + goto cleanup; + } + + res = SymCryptRsakeyCreate( p, cb, pParams ); + +cleanup: + return res; +} + +VOID +SYMCRYPT_CALL +SymCryptRsakeyFree( _Out_ PSYMCRYPT_RSAKEY pkObj ) +{ + SYMCRYPT_CHECK_MAGIC( pkObj ); + SymCryptRsakeyWipe( pkObj ); + SymCryptCallbackFree( pkObj ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptSizeofRsakeyFromParams( _In_ PCSYMCRYPT_RSA_PARAMS pParams ) +{ + UINT32 nModulusDigits; + UINT32 res; + + SYMCRYPT_ASSERT( pParams != NULL ); + + nModulusDigits = SymCryptDigitsFromBits( pParams->nBitsOfModulus ); + + // + // From symcrypt_internal.h we have: + // - sizeof results are upper bounded by 2^19 + // - SYMCRYPT_SCRATCH_BYTES results are upper bounded by 2^27 (including RSA and ECURVE) + // - nPrimes and nPubExps are bounded by SYMCRYPT_RSAKEY_MAX_NUMOF_PRIMES = 2 and + // SYMCRYPT_RSAKEY_MAX_NUMOF_PUBEXPS = 1 + // Thus the following calculation does not overflow the result. + // + res = sizeof(SYMCRYPT_RSAKEY) + + SymCryptSizeofModulusFromDigits( nModulusDigits ) + // For Modulus + pParams->nPrimes * SymCryptSizeofModulusFromDigits( nModulusDigits ) + // For Primes + pParams->nPrimes * SYMCRYPT_SIZEOF_MODELEMENT_FROM_BITS( pParams->nBitsOfModulus ) + // For CrtInverses + pParams->nPubExp * SymCryptSizeofIntFromDigits( nModulusDigits ) + // For PrivExps + pParams->nPubExp * pParams->nPrimes * SymCryptSizeofIntFromDigits( nModulusDigits ); // For CrtPrivExps + + // Consistency check with the static macro (optimized away in production) + SYMCRYPT_ASSERT( res <= SYMCRYPT_SIZEOF_RSAKEY_FROM_PARAMS( pParams->nBitsOfModulus, pParams->nPrimes, pParams->nPubExp ) ); + + return res; +} + +PSYMCRYPT_RSAKEY +SYMCRYPT_CALL +SymCryptRsakeyCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + _In_ PCSYMCRYPT_RSA_PARAMS pParams ) +{ + PSYMCRYPT_RSAKEY pkObj = NULL; + + PBYTE pbCurr = pbBuffer; + SIZE_T cbNeeded; + SIZE_T itemSize; + + SYMCRYPT_ASSERT( pParams != NULL ); + + cbNeeded = SymCryptSizeofRsakeyFromParams( pParams ); + + SYMCRYPT_ASSERT_ASYM_ALIGNED( pbCurr ); + + if (( cbBuffer < cbNeeded ) || + ( pParams->nBitsOfModulus < SYMCRYPT_RSAKEY_MIN_BITSIZE_MODULUS ) || + ( pParams->nBitsOfModulus > SYMCRYPT_RSAKEY_MAX_BITSIZE_MODULUS ) || + ( pParams->nPubExp < 1 ) || + ( pParams->nPubExp > SYMCRYPT_RSAKEY_MAX_NUMOF_PUBEXPS ) || + ( pParams->nPrimes == 1 ) || + ( pParams->nPrimes > SYMCRYPT_RSAKEY_MAX_NUMOF_PRIMES ) ) + { + goto cleanup; + } + SYMCRYPT_ASSERT( cbBuffer >= sizeof( SYMCRYPT_RSAKEY ) ); + + pkObj = (PSYMCRYPT_RSAKEY) pbCurr; + + // Set all the parameters to 0 + SymCryptWipe( pbBuffer, cbBuffer ); + + // Main parameters of the RSAKEY + // Everything is 0 until created + + pkObj->cbTotalSize = (UINT32) cbNeeded; + // The result should always be within 4 GB, but we check to avoid security bugs + SYMCRYPT_ASSERT( pkObj->cbTotalSize == cbNeeded ); + + pkObj->hasPrivateKey = FALSE; + + pkObj->nSetBitsOfModulus = pParams->nBitsOfModulus; + pkObj->nDigitsOfModulus = SymCryptDigitsFromBits( pkObj->nSetBitsOfModulus ); // The modulus object has always this number of digits + + pkObj->nPrimes = pParams->nPrimes; + pkObj->nPubExp = pParams->nPubExp; + + pbCurr += sizeof( SYMCRYPT_RSAKEY ); + + // Modulus + itemSize = SymCryptSizeofModulusFromDigits( pkObj->nDigitsOfModulus ); + SYMCRYPT_ASSERT( cbBuffer >= sizeof( SYMCRYPT_RSAKEY ) + itemSize + + (pkObj->nPrimes*SymCryptSizeofModulusFromDigits( pkObj->nDigitsOfModulus )) + + (pkObj->nPrimes*SYMCRYPT_SIZEOF_MODELEMENT_FROM_BITS( pParams->nBitsOfModulus )) + + (pkObj->nPubExp*SymCryptSizeofIntFromDigits( pkObj->nDigitsOfModulus )) + + (pkObj->nPubExp*pkObj->nPrimes*SymCryptSizeofIntFromDigits( pkObj->nDigitsOfModulus )) ); + pkObj->pmModulus = SymCryptModulusCreate( + pbCurr, + itemSize, + pkObj->nDigitsOfModulus ); + SYMCRYPT_ASSERT( pkObj->pmModulus != NULL ); + pbCurr += itemSize; + + // For the remaining objects + // defer creation until SymCryptRsakeyGenerate or + // SymCryptRsakeySetValue + + // Primes + for (UINT32 i=0; i<pkObj->nPrimes; i++) + { + pkObj->pbPrimes[i] = pbCurr; + pbCurr += SymCryptSizeofModulusFromDigits( pkObj->nDigitsOfModulus ); + } + + // CRT Inverses of primes + for (UINT32 i=0; i<pkObj->nPrimes; i++) + { + pkObj->pbCrtInverses[i] = pbCurr; + pbCurr += SYMCRYPT_SIZEOF_MODELEMENT_FROM_BITS( pParams->nBitsOfModulus ); + } + + // Private exponents + for (UINT32 i=0; i<pkObj->nPubExp; i++) + { + pkObj->pbPrivExps[i] = pbCurr; + pbCurr += SymCryptSizeofIntFromDigits( pkObj->nDigitsOfModulus ); + } + + // Private exponents modulo each prime (minus 1) + for (UINT32 i=0; i<pkObj->nPubExp*pkObj->nPrimes; i++) + { + pkObj->pbCrtPrivExps[i] = pbCurr; + pbCurr += SymCryptSizeofIntFromDigits( pkObj->nDigitsOfModulus ); + } + + // Setting the magic + SYMCRYPT_SET_MAGIC( pkObj ); + +cleanup: + return pkObj; +} + +VOID +SYMCRYPT_CALL +SymCryptRsakeyWipe( _Out_ PSYMCRYPT_RSAKEY pkDst ) +{ + // Wipe the whole structure in one go. + SymCryptWipe( pkDst, pkDst->cbTotalSize ); +} + +#if 0 +VOID +SYMCRYPT_CALL +SymCryptRsakeyCopy( + _In_ PCSYMCRYPT_RSAKEY pkSrc, + _Out_ PSYMCRYPT_RSAKEY pkDst ) +{ + SymCryptFatal( 'rsac' ); + // This function doesn't work correctly because subobjects might + // not have been created yet. + // Future: fix this + + // + // in-place copy is somewhat common... + // + if( pkSrc != pkDst ) + { + pkDst->fAlgorithmInfo = pkSrc->fAlgorithmInfo; + pkDst->cbTotalSize = pkSrc->cbTotalSize; + pkDst->hasPrivateKey = pkSrc->hasPrivateKey; + pkDst->nSetBitsOfModulus = pkSrc->nSetBitsOfModulus; + + pkDst->nBitsOfModulus = pkSrc->nBitsOfModulus; + pkDst->nDigitsOfModulus = pkSrc->nDigitsOfModulus; + + pkDst->nPubExp = pkSrc->nPubExp; + for (UINT32 i=0; i<SYMCRYPT_RSAKEY_MAX_NUMOF_PUBEXPS; i++) + { + pkDst->au64PubExp[i] = pkSrc->au64PubExp[i]; + } + + pkDst->nPrimes = pkSrc->nPrimes; + for (UINT32 i=0; i<SYMCRYPT_RSAKEY_MAX_NUMOF_PRIMES; i++) + { + pkDst->nBitsOfPrimes[i] = pkSrc->nBitsOfPrimes[i]; + pkDst->nDigitsOfPrimes[i] = pkSrc->nDigitsOfPrimes[i]; + } + + // Copy the objects + SymCryptModulusCopy( pkSrc->pmModulus, pkDst->pmModulus ); + + for (UINT32 i=0; i< pkSrc->nPrimes; i++) + { + SymCryptModulusCopy( pkSrc->pmPrimes[i], pkDst->pmPrimes[i] ); + SymCryptModElementCopy( pkSrc->pmPrimes[i], pkSrc->peCrtInverses[i], pkDst->peCrtInverses[i] ); + } + + for (UINT32 i=0; i< pkSrc->nPubExp; i++) + { + SymCryptIntCopy( pkSrc->piPrivExps[i], pkDst->piPrivExps[i] ); + } + + for (UINT32 i=0; i< pkSrc->nPubExp*pkSrc->nPrimes; i++) + { + SymCryptIntCopy( pkSrc->piCrtPrivExps[i], pkDst->piCrtPrivExps[i] ); + } + } +} +#endif + +BOOLEAN +SYMCRYPT_CALL +SymCryptRsakeyHasPrivateKey( _In_ PCSYMCRYPT_RSAKEY pkRsakey ) +{ + return pkRsakey->hasPrivateKey; +} + +UINT32 +SYMCRYPT_CALL +SymCryptRsakeySizeofModulus( _In_ PCSYMCRYPT_RSAKEY pkRsakey ) +{ + return (pkRsakey->nBitsOfModulus + 7)/8; +} + +UINT32 +SYMCRYPT_CALL +SymCryptRsakeyModulusBits( _In_ PCSYMCRYPT_RSAKEY pkRsakey ) +{ + return pkRsakey->nBitsOfModulus; +} + +UINT32 +SYMCRYPT_CALL +SymCryptRsakeySizeofPublicExponent( + _In_ PCSYMCRYPT_RSAKEY pRsakey, + UINT32 index ) +{ + SYMCRYPT_ASSERT( index == 0 ); + UNREFERENCED_PARAMETER( index ); + return SymCryptUint64Bytesize( pRsakey->au64PubExp[0] ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptRsakeySizeofPrime( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + UINT32 index ) +{ + return (pkRsakey->nBitsOfPrimes[index] + 7)/8; +} + +UINT32 +SYMCRYPT_CALL +SymCryptRsakeyGetNumberOfPublicExponents( _In_ PCSYMCRYPT_RSAKEY pkRsakey ) +{ + return pkRsakey->nPubExp; +} + +UINT32 +SYMCRYPT_CALL +SymCryptRsakeyGetNumberOfPrimes( _In_ PCSYMCRYPT_RSAKEY pkRsakey ) +{ + return pkRsakey->nPrimes; +} + +VOID +SYMCRYPT_CALL +SymCryptRsakeyCreateAllObjects( _Inout_ PSYMCRYPT_RSAKEY pkRsakey ) +{ + // Primes + for (UINT32 i=0; i<pkRsakey->nPrimes; i++) + { + pkRsakey->pmPrimes[i] = SymCryptModulusCreate( + pkRsakey->pbPrimes[i], + SymCryptSizeofModulusFromDigits( pkRsakey->nDigitsOfPrimes[i] ), + pkRsakey->nDigitsOfPrimes[i] ); + SYMCRYPT_ASSERT( pkRsakey->pmPrimes[i] != NULL ); + } + + // CRT Inverses of primes + for (UINT32 i=0; i<pkRsakey->nPrimes; i++) + { + pkRsakey->peCrtInverses[i] = SymCryptModElementCreate( + pkRsakey->pbCrtInverses[i], + SymCryptSizeofModElementFromModulus( pkRsakey->pmPrimes[i] ), + pkRsakey->pmPrimes[i] ); + SYMCRYPT_ASSERT( pkRsakey->peCrtInverses[i] != NULL ); + } + + // Private exponents + for( UINT32 i=0; i<pkRsakey->nPubExp; i++ ) + { + pkRsakey->piPrivExps[i] = SymCryptIntCreate( + pkRsakey->pbPrivExps[i], + SymCryptSizeofIntFromDigits( pkRsakey->nDigitsOfModulus ), + pkRsakey->nDigitsOfModulus ); + SYMCRYPT_ASSERT( pkRsakey->piPrivExps[i] != NULL ); + } + + // Private exponents modulo each prime (minus 1) + for (UINT32 i=0; i<pkRsakey->nPubExp*pkRsakey->nPrimes; i++) + { + pkRsakey->piCrtPrivExps[i] = SymCryptIntCreate( + pkRsakey->pbCrtPrivExps[i], + SymCryptSizeofIntFromDigits( pkRsakey->nDigitsOfPrimes[i] ), + pkRsakey->nDigitsOfPrimes[i] ); + SYMCRYPT_ASSERT( pkRsakey->piCrtPrivExps[i] != NULL ); + } +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsakeyCalculatePrivateFields( + _Inout_ PSYMCRYPT_RSAKEY pkRsakey, + _Out_ PSYMCRYPT_DIVISOR pdTmp, // Temporary of nMaxDigitsOfPrimes + _Out_ PSYMCRYPT_INT piPhi, // Temporary of nDigitsOfModulus + _Out_ PSYMCRYPT_INT piAcc, // Temporary of nMaxDigitsOfPrimes + nDigitsOfModulus + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch, + UINT32 flags +) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + BYTE tmpGcdBuf[ SYMCRYPT_SIZEOF_INT_FROM_BITS( 64 ) + SYMCRYPT_ASYM_ALIGN_VALUE]; + PSYMCRYPT_INT piTmpGcd; + + // Use pdTmp as int scratch + PSYMCRYPT_INT piScr = SymCryptIntFromDivisor(pdTmp); + + UINT32 allowedFlags = SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION; + + if ( ( flags & ~allowedFlags ) != 0 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // We need a 1-digit tmp value to store the GCD in. + // Simpler to put it on the stack than to add full scratch size computation support to this function + piTmpGcd = SymCryptIntCreate( SYMCRYPT_ASYM_ALIGN_UP( tmpGcdBuf ), sizeof( tmpGcdBuf ) - SYMCRYPT_ASYM_ALIGN_VALUE, SymCryptDigitsFromBits( 64 ) ); + + // Run the CRT generation + scError = SymCryptCrtGenerateInverses( pkRsakey->nPrimes, pkRsakey->pmPrimes, 0, pkRsakey->peCrtInverses, pbScratch, cbScratch); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // Calculate Phi + SymCryptIntSetValueUint32( 1, piPhi ); + for (UINT32 i=0; i<pkRsakey->nPrimes; i++) + { + // piScr can have the different number of digits than each prime + scError = SymCryptIntCopyMixedSize( SymCryptIntFromModulus( pkRsakey->pmPrimes[i] ), piScr ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + SymCryptIntSubUint32( piScr, 1, piScr ); // p-1 + SymCryptIntMulMixedSize( piScr, piPhi, piAcc, pbScratch, cbScratch ); + scError = SymCryptIntCopyMixedSize( piAcc, piPhi ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + } + + // Calculate the private exponents + for (UINT32 i=0; i<pkRsakey->nPubExp; i++) + { + // IntExtendedGcd requirements: + // - First argument > 0: piPhi as the product of p-1's + // - Second argument: odd, verified below + // We also reject public exponent 1, as that is obviously unsafe. + if( pkRsakey->au64PubExp[i] == 1 || (pkRsakey->au64PubExp[i] & 1) != 1) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Calculate D + SymCryptIntSetValueUint64( pkRsakey->au64PubExp[i], piScr ); + + // Calculate D + SymCryptIntExtendedGcd( + piPhi, + piScr, + SYMCRYPT_FLAG_GCD_INPUTS_NOT_BOTH_EVEN, + piTmpGcd, // Gcd + NULL, // Lcm + NULL, // InvSrc1ModSrc2 + pkRsakey->piPrivExps[i], + pbScratch, + cbScratch); + + if( !SymCryptIntIsEqualUint32( piTmpGcd, 1 ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + } + + //Calculate the private exponents modulo each prime minus 1 + for (UINT32 i=0; i<pkRsakey->nPrimes; i++) + { + scError = SymCryptIntCopyMixedSize( SymCryptIntFromModulus(pkRsakey->pmPrimes[i]), SymCryptIntFromDivisor(pdTmp) ); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // IntToDivisor requirement: + // Each prime has at least SYMCRYPT_RSAKEY_MIN_BITSIZE_PRIME bits --> P-1 > 0 + SymCryptIntSubUint32( SymCryptIntFromDivisor(pdTmp), 1, SymCryptIntFromDivisor(pdTmp) ); + SymCryptIntToDivisor( + SymCryptIntFromDivisor(pdTmp), + pdTmp, + pkRsakey->nPubExp, + 0, + pbScratch, + cbScratch ); + + for (UINT32 j=0; j<pkRsakey->nPubExp; j++) + { + SymCryptIntDivMod( + pkRsakey->piPrivExps[j], + pdTmp, + NULL, + piPhi, // Set it to Phi as each private exponent might have different size + pbScratch, + cbScratch ); + + scError = SymCryptIntCopyMixedSize( piPhi, pkRsakey->piCrtPrivExps[ j*pkRsakey->nPrimes + i ]); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + } + } + + // Check that the product of the primes is in fact the modulus + if( (flags & SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION) == 0 ) + { + if( pkRsakey->nPrimes != 2 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + SymCryptIntMulMixedSize( + SymCryptIntFromModulus(pkRsakey->pmPrimes[0]), + SymCryptIntFromModulus(pkRsakey->pmPrimes[1]), + piAcc, + pbScratch, cbScratch ); + + if( !SymCryptIntIsEqual( piAcc, SymCryptIntFromModulus( pkRsakey->pmModulus ) ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + } + +cleanup: + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsakeyGenerate( + _Inout_ PSYMCRYPT_RSAKEY pkRsakey, + _In_reads_opt_( nPubExp ) PCUINT64 pu64PubExp, + UINT32 nPubExp, + _In_ UINT32 flags ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + // 3 sizes of temporary elements: + // - ndPrimes = number of digit size of each prime (we choose it to be the same for all primes) + // - ndMod = pkRsakey->nDigitsOfModulus + // - ndLarge = ndPrimes + ndMod + + UINT32 ndPrimes = 0; + + UINT32 cbPrimes = 0; + PSYMCRYPT_INT piLow = NULL; + PSYMCRYPT_INT piHigh = NULL; + + UINT32 cbDivisor = 0; + PSYMCRYPT_DIVISOR pdTmp = NULL; + + UINT32 ndMod = pkRsakey->nDigitsOfModulus; + UINT32 cbMod = 0; + PSYMCRYPT_INT piPhi = NULL; + + UINT32 ndLarge = 0; + UINT32 cbLarge = 0; + PSYMCRYPT_INT piAcc = NULL; + + PBYTE pbScratch = NULL; + UINT32 cbScratch = 0; + PBYTE pbFnScratch = NULL; + UINT32 cbFnScratch = 0; + + UINT32 maxTries = 0; // For the prime generation (and the modulus operations ?) + UINT32 primeBits = 0; + + const UINT64 defaultExponent = RSA_DEFAULT_PUBLIC_EXPONENT; + + // Ensure caller has specified what algorithm(s) the key will be used with + UINT32 algorithmFlags = SYMCRYPT_FLAG_RSAKEY_SIGN | SYMCRYPT_FLAG_RSAKEY_ENCRYPT; + // Ensure only allowed flags are specified + UINT32 allowedFlags = SYMCRYPT_FLAG_KEY_NO_FIPS | algorithmFlags; + + if ( ( ( flags & ~allowedFlags ) != 0 ) || + ( ( flags & algorithmFlags ) == 0) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // SymCryptRsaSignVerifyPct requires the generated key to be at least 496 bits to avoid fatal + // Require caller to specify NO_FIPS for up to 1024 bits as running FIPS tests on too-small keys + // does not make it FIPS certifiable and gives the wrong impression to callers + if ( ( (flags & SYMCRYPT_FLAG_KEY_NO_FIPS) == 0 ) && + ( pkRsakey->nSetBitsOfModulus < SYMCRYPT_RSAKEY_FIPS_MIN_BITSIZE_MODULUS ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Handle the default exponent case + if( pu64PubExp == NULL && nPubExp == 0 ) + { + pu64PubExp = &defaultExponent; + nPubExp = 1; + } + + // Make sure we have: + // - exactly 2 primes + // - the right number of public exponents + // - exactly 1 public exponent + if (pkRsakey->nPrimes != 2 || nPubExp != pkRsakey->nPubExp || nPubExp != 1 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Copy the public exponent into the key + pkRsakey->au64PubExp[0] = pu64PubExp[0]; + + // Before doing anything calculate all the needed sizes + // The size limits were checked in SymCryptRsakeyCreate which is the only way to create an Rsakey object. + pkRsakey->nBitsOfModulus = pkRsakey->nSetBitsOfModulus; // This will be the exact bit size of our modulus + + pkRsakey->nBitsOfPrimes[0] = (pkRsakey->nBitsOfModulus + 1)/2; + pkRsakey->nBitsOfPrimes[1] = pkRsakey->nBitsOfModulus/2; // The second prime is one bit smaller for odd-length moduli + + pkRsakey->nDigitsOfPrimes[0] = SymCryptDigitsFromBits(pkRsakey->nBitsOfPrimes[0]); + pkRsakey->nDigitsOfPrimes[1] = SymCryptDigitsFromBits(pkRsakey->nBitsOfPrimes[1]); + + pkRsakey->nMaxDigitsOfPrimes = SYMCRYPT_MAX(pkRsakey->nDigitsOfPrimes[0], pkRsakey->nDigitsOfPrimes[1]); + + ndPrimes = pkRsakey->nMaxDigitsOfPrimes; + ndLarge = ndPrimes + ndMod; + + primeBits = SYMCRYPT_MAX(pkRsakey->nBitsOfPrimes[0],pkRsakey->nBitsOfPrimes[1]); + maxTries = 100 * primeBits; + + // Create all the SymCryptObjects + SymCryptRsakeyCreateAllObjects( pkRsakey ); + + // Allocate the temp integers and the scratch space + // All sizes are limited by the modulus sizes verified in SymCryptRsakeyCreate + cbPrimes = SymCryptSizeofIntFromDigits( ndPrimes ); + cbMod = SymCryptSizeofIntFromDigits( ndMod ); + cbLarge = SymCryptSizeofIntFromDigits( ndLarge ); + cbDivisor = SymCryptSizeofDivisorFromDigits( ndPrimes ); + + cbScratch = 2*cbPrimes + cbMod + cbLarge + cbDivisor + + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_INT_PRIME_GEN(ndPrimes), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_INT_TO_MODULUS(ndMod), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_INT_MUL(ndMod), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_CRT_GENERATION(ndPrimes), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_EXTENDED_GCD(ndMod), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_INT_TO_DIVISOR(ndPrimes), + SYMCRYPT_SCRATCH_BYTES_FOR_INT_DIVMOD( ndMod, ndPrimes ) + )))))); + + pbScratch = (PBYTE)SymCryptCallbackAlloc( cbScratch ); + if (pbScratch == NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + pbFnScratch = pbScratch; + cbFnScratch = cbScratch; + + // Create temporaries + // dcl - this would be easier to review with one statement per line + piLow = SymCryptIntCreate( pbFnScratch, cbPrimes, ndPrimes ); pbFnScratch += cbPrimes; cbFnScratch -= cbPrimes; + piHigh = SymCryptIntCreate( pbFnScratch, cbPrimes, ndPrimes ); pbFnScratch += cbPrimes; cbFnScratch -= cbPrimes; + + piPhi = SymCryptIntCreate( pbFnScratch, cbMod, ndMod ); pbFnScratch += cbMod; cbFnScratch -= cbMod; + + piAcc = SymCryptIntCreate( pbFnScratch, cbLarge, ndLarge ); pbFnScratch += cbLarge; cbFnScratch -= cbLarge; + + pdTmp = SymCryptDivisorCreate( pbFnScratch, cbDivisor, ndPrimes ); pbFnScratch += cbDivisor; cbFnScratch -= cbDivisor; + + // ***Prime generation limits*** + // + // If nBitsOfModulus is even (main case) + // Low limit = 2^{primeBits-1} + 2^{primeBits - 2} + // High limit = 2^primeBits - 1 + // + // If nBitsOfModulus is odd we use different + // limits for the two primes (until we have an integer sqrt function) + // + // For the first + // Low limit = 2^{primeBits-1} + 2^{primeBits - 2} + // High limit = 2^primeBits - 1 + // For the second + // Low limit = 2^{primeBits-2} + 2^{primeBits - 3} + // High limit = 2^{primeBits-1} - 1 + // + // Notice that nBitsOfModulus is a public value. + // + // *** TODO: This works only for 2 primes to give modulus + // of exactly nBitsOfModulus bits. + + SymCryptIntSetValueUint32( 3, piLow ); + SymCryptIntMulPow2( piLow, primeBits - 2, piLow ); + + SymCryptIntSetValueUint32( 1, piHigh ); + SymCryptIntMulPow2( piHigh, primeBits, piHigh ); + SymCryptIntSubUint32( piHigh, 1, piHigh ); + + // Generate primes and at the same time accumulate their product into piPhi + SymCryptIntSetValueUint32( 1, piPhi ); + for (UINT32 i=0; i<pkRsakey->nPrimes; i++) + { + if ( ((pkRsakey->nBitsOfModulus % 2)==1) && (i>0) ) + { + SymCryptIntDivPow2( piLow, 1, piLow ); + SymCryptIntDivPow2( piHigh, 1, piHigh ); + } + + // IntGenerateRandomPrime requirement: + // piLow > 3 since nBitsOfModulus is bounded by + // SYMCRYPT_RSAKEY_MIN_BITSIZE_MODULUS. + scError = SymCryptIntGenerateRandomPrime( + piLow, + piHigh, + pu64PubExp, + nPubExp, + maxTries, + 0, + SymCryptIntFromModulus( pkRsakey->pmPrimes[i] ), + pbFnScratch, + cbFnScratch); + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // IntToModulus requirement: + // piLow > 0 --> pkRsakey->pmPrimes[i] > 0 + SymCryptIntToModulus( + SymCryptIntFromModulus( pkRsakey->pmPrimes[i] ), + pkRsakey->pmPrimes[i], + pkRsakey->nBitsOfModulus, // Average number of operations + SYMCRYPT_FLAG_MODULUS_PARITY_PUBLIC | SYMCRYPT_FLAG_MODULUS_PRIME, + pbFnScratch, + cbFnScratch ); + + SymCryptIntMulMixedSize( SymCryptIntFromModulus( pkRsakey->pmPrimes[i] ), piPhi, piAcc, pbFnScratch, cbFnScratch ); // P_i * Product + scError = SymCryptIntCopyMixedSize( piAcc, piPhi ); // Move the result to piPhi + if (scError!=SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + } + + // IntToModulus requirement: + // piPhi product of non-zero primes --> piPhi > 0 + SymCryptIntCopy( piPhi, SymCryptIntFromModulus( pkRsakey->pmModulus ) ); + SymCryptIntToModulus( + SymCryptIntFromModulus( pkRsakey->pmModulus ), + pkRsakey->pmModulus, + pkRsakey->nBitsOfModulus, // Average number of operations + SYMCRYPT_FLAG_DATA_PUBLIC, + pbFnScratch, + cbFnScratch ); + + if ( SymCryptIntBitsizeOfValue( piPhi ) != pkRsakey->nBitsOfModulus) + { + scError = SYMCRYPT_EXTERNAL_FAILURE; // This should never happen (make it assert) + goto cleanup; + } + + // Calculate the rest of the fields + scError = SymCryptRsakeyCalculatePrivateFields( pkRsakey, pdTmp, piPhi, piAcc, pbFnScratch, cbFnScratch, 0 ); + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + pkRsakey->hasPrivateKey = TRUE; + + pkRsakey->fAlgorithmInfo = flags; // We want to track all of the flags in the Rsakey + + if ( ( flags & SYMCRYPT_FLAG_KEY_NO_FIPS ) == 0 ) + { + // Ensure RSA algorithm selftest is run before first use of RSA algorithm + // Per FIPS 140-3 IG, this selftest cannot be a PCT + SYMCRYPT_RUN_SELFTEST_ONCE( + SymCryptRsaSelftest, + SYMCRYPT_SELFTEST_ALGORITHM_RSA); + + // Run SignVerify PCT on generated keypair + // Our current understanding is that this PCT is sufficient for both RSA_SIGN and RSA_ENCRYPT + + // Unconditionally set the sign flag to enable SignVerify PCT on encrypt-only keypair + pkRsakey->fAlgorithmInfo |= SYMCRYPT_FLAG_RSAKEY_SIGN; + + SYMCRYPT_RUN_KEY_GEN_PCT( + SymCryptRsaSignVerifyPct, + pkRsakey, + SYMCRYPT_PCT_RSA_SIGN ); + + // Unset the sign flag before returning encrypt-only keypair + if ( ( flags & SYMCRYPT_FLAG_RSAKEY_SIGN ) == 0 ) + { + pkRsakey->fAlgorithmInfo ^= SYMCRYPT_FLAG_RSAKEY_SIGN; + } + } + +cleanup: + if (pbScratch!=NULL) + { + SymCryptWipe(pbScratch,cbScratch); + SymCryptCallbackFree(pbScratch); + } + + return scError; +} + +// The maximum number of iterations we use in probabilistic prime recovery method +// If n, e, d are valid then successful prime recover for each iteration should +// occur with probability ~1/2; with 100 iterations we fail for a valid private +// exponent with probability ~2^-100 +#define SYMCRYPT_MAX_PRIME_RECOVERY_ITERATIONS (100) + +#define SYMCRYPT_SCRATCH_BYTES_FOR_PRIME_RECOVERY( _ndMod, _ndPubExp, _nBitsMod ) \ + SymCryptSizeofIntFromDigits( _ndMod ) + /* Space for piPrivExp*/ \ + SymCryptSizeofIntFromDigits( _ndPubExp ) + /* Space for piPubExp*/ \ + SymCryptSizeofIntFromDigits( _ndMod + _ndPubExp ) + /* Space for piExpProd*/ \ + (4*SYMCRYPT_SIZEOF_MODELEMENT_FROM_BITS( _nBitsMod )) + /* Space for peTmpY, peTmpX, peOne, peNegOne */\ + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_INT_MUL( _ndMod + _ndPubExp ), /* Space for SymCryptIntMulMixedSize */ \ + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( _ndMod ), /* Space for other SymCryptMod* */ \ + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_MODEXP( _ndMod ), /* Space for SymCryptModExp */ \ + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_EXTENDED_GCD( _ndMod ), /* Space for SymCryptIntExtendedGcd */ \ + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_INT_TO_MODULUS( _ndMod ), /* Space for SymCryptIntToModulus */ \ + SYMCRYPT_SCRATCH_BYTES_FOR_INT_DIVMOD( _ndMod, _ndMod ) /* Space for SymCryptIntDivMod */ \ + ))))) + +static +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsakeyCalculatePrimesFromPrivateExponent( + _Inout_ PSYMCRYPT_RSAKEY pkRsakey, // must already have modulus and public exponent set + _In_reads_bytes_( cbPrivateExponent ) + PCBYTE pbPrivateExponent, + SIZE_T cbPrivateExponent, + SYMCRYPT_NUMBER_FORMAT numFormat, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + UINT32 cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + // 3 digit sizes of temporary integers: + // - ndMod = pkRsakey->nDigitsOfModulus + // - ndPubExp = digits for a UINT64 public exponent + // - ndExpProd = ndMod + ndPubExp + + UINT32 ndMod = pkRsakey->nDigitsOfModulus; + UINT32 cbMod = SymCryptSizeofIntFromDigits( ndMod ); + + UINT32 ndPubExp = SymCryptDigitsFromBits( 64 ); + UINT32 cbPubExp = SymCryptSizeofIntFromDigits( ndPubExp ); + + UINT32 nBitsExpProd = 0; // we compute this later before use + UINT32 ndExpProd = ndMod + ndPubExp; + UINT32 cbExpProd = SymCryptSizeofIntFromDigits( ndExpProd ); + + UINT32 cbModElement = SYMCRYPT_SIZEOF_MODELEMENT_FROM_BITS( pkRsakey->nBitsOfModulus ); + + PSYMCRYPT_INT piPrivExp = NULL; + PSYMCRYPT_INT piPubExp = NULL; + PSYMCRYPT_INT piExpProd = NULL; + PSYMCRYPT_MODELEMENT peTmpY = NULL; + PSYMCRYPT_MODELEMENT peTmpX = NULL; + PSYMCRYPT_MODELEMENT peTmpPtr = NULL; + PSYMCRYPT_MODELEMENT peOne = NULL; + PSYMCRYPT_MODELEMENT peNegOne = NULL; + + PBYTE pbFnScratch = pbScratch; + UINT32 cbFnScratch = cbScratch; + + UINT64 low64ExpProd = 0; + UINT32 trailingZeros = 0; + + BOOL bFoundNonTrivialRoot = FALSE; + + // + // Recover primes from private exponent using probabilistic prime-factor recovery method + // See SP800-56B rev2 Appendix C.1 and Boneh 1999 + // + SYMCRYPT_ASSERT( pkRsakey->nPrimes == 2 ); + SYMCRYPT_ASSERT( cbScratch >= SYMCRYPT_SCRATCH_BYTES_FOR_PRIME_RECOVERY(ndMod, ndPubExp, pkRsakey->nBitsOfModulus) ); + + piPrivExp = SymCryptIntCreate( pbFnScratch, cbMod, ndMod ); + SYMCRYPT_ASSERT( piPrivExp != NULL ); + pbFnScratch += cbMod; + cbFnScratch -= cbMod; + piPubExp = SymCryptIntCreate( pbFnScratch, cbPubExp, ndPubExp ); + SYMCRYPT_ASSERT( piPubExp != NULL ); + pbFnScratch += cbPubExp; + cbFnScratch -= cbPubExp; + piExpProd = SymCryptIntCreate( pbFnScratch, cbExpProd, ndExpProd ); + SYMCRYPT_ASSERT( piExpProd != NULL ); + pbFnScratch += cbExpProd; + cbFnScratch -= cbExpProd; + + peTmpY = SymCryptModElementCreate( pbFnScratch, cbModElement, pkRsakey->pmModulus ); + SYMCRYPT_ASSERT( peTmpY != NULL ); + pbFnScratch += cbModElement; + cbFnScratch -= cbModElement; + peTmpX = SymCryptModElementCreate( pbFnScratch, cbModElement, pkRsakey->pmModulus ); + SYMCRYPT_ASSERT( peTmpX != NULL ); + pbFnScratch += cbModElement; + cbFnScratch -= cbModElement; + peOne = SymCryptModElementCreate( pbFnScratch, cbModElement, pkRsakey->pmModulus ); + SYMCRYPT_ASSERT( peOne != NULL ); + pbFnScratch += cbModElement; + cbFnScratch -= cbModElement; + peNegOne = SymCryptModElementCreate( pbFnScratch, cbModElement, pkRsakey->pmModulus ); + SYMCRYPT_ASSERT( peNegOne != NULL ); + pbFnScratch += cbModElement; + cbFnScratch -= cbModElement; + + // Ensure that modulus is odd - this is required for later SymCryptIntExtendedGcd + if( (SymCryptIntGetValueLsbits32(SymCryptIntFromModulus( pkRsakey->pmModulus ))& 1)==0 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Import private exponent + scError = SymCryptIntSetValue(pbPrivateExponent, cbPrivateExponent, numFormat, piPrivExp); + if( scError != SYMCRYPT_NO_ERROR ) + { + // The integer cannot fit the private exponent (SYMCRYPT_BUFFER_TOO_SMALL), + // only if the caller providing a private exponent larger than the public modulus + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Basic range check + if( !SymCryptIntIsLessThan(piPrivExp, SymCryptIntFromModulus(pkRsakey->pmModulus)) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Given range check, we can guarantee to compute + // Private exponent (d) * Public exponent (e) + // In piExpProd without overflow + SymCryptIntSetValueUint64( pkRsakey->au64PubExp[0], piPubExp ); + + // compute upper bound on product bit count based on public data (nBitsOfModulus (public) >= nBitsOfPrivateExponent (private)) + nBitsExpProd = pkRsakey->nBitsOfModulus + SymCryptIntBitsizeOfValue( piPubExp ); + + SymCryptIntMulMixedSize( piPrivExp, piPubExp, piExpProd, pbFnScratch, cbFnScratch ); + + // Ensure d*e is odd + low64ExpProd = SymCryptIntGetValueLsbits64( piExpProd ); + + if( (low64ExpProd & 1) == 0 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Compute how many trailing zeros in m = d*e - 1 + // + // We are variable time w.r.t. the number of trailing (up to 64) zeroes. An attacker using + // sidechannels to determine the number of trailing zeroes of m can glean information about + // the private exponent proportionate to the number of trailing zeroes. As we bound this to + // 64, at most an attacker can theoretically determine 64-bits of an expected 2048-bits of + // private exponent - and they can only do this for 1 in 2^64 keys. + // + // It would be possible to mask the number of trailing zeroes from sidechannels by always + // squaring by 64 times in the inner loop below and using masked operations to select out + // any found non-trivial root. We can consider doing this as a hardening measure if this API + // does see a lot of usage, but the expectation is that this method will almost never be + // used and it is just not enough of a leak for an attacker to even try to measuring. + trailingZeros = SymCryptCountTrailingZeros64( low64ExpProd-1 ); + + // If there are 64 trailing zeroes then we abort because the prime factor recovery method + // could theoretically leak more than 64-bits of the private exponent. The likelihood of any + // key which has this many trailing zeroes _ever_ having being generated by a legitimate key + // generation process is extremely small given the cost of RSA key generation. This much more + // likely indicates faulty inputs or a hardware fault rather than a legitimate keypair we + // should try to import. + if( trailingZeros == 64 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + SymCryptIntDivPow2( piExpProd, trailingZeros, piExpProd ); // r = m >> t + SymCryptModElementSetValueUint32( 1, pkRsakey->pmModulus, peOne, pbFnScratch, cbFnScratch ); + SymCryptModElementSetValueNegUint32( 1, pkRsakey->pmModulus, peNegOne, pbFnScratch, cbFnScratch ); + + for( UINT32 i=0; i<SYMCRYPT_MAX_PRIME_RECOVERY_ITERATIONS; i++ ) + { + // y is random value g in [2,n-2] + SymCryptModSetRandom( pkRsakey->pmModulus, peTmpY, 0, pbFnScratch, cbFnScratch ); + + // ModExp y = g^r in place + // could make this a bit faster and leakier using trailingZeros to reduce nBitsExp, but + // not normally a big performance win + SymCryptModExp( + pkRsakey->pmModulus, + peTmpY, + piExpProd, + nBitsExpProd-1, + 0, + peTmpY, + pbFnScratch, cbFnScratch ); + + // if y == 1 or y == -1, start over (we found a trivial root of 1) + if( SymCryptModElementIsEqual( pkRsakey->pmModulus, peTmpY, peOne ) || + SymCryptModElementIsEqual( pkRsakey->pmModulus, peTmpY, peNegOne ) ) + { + continue; + } + + for( UINT32 j=1; j<=trailingZeros; j++ ) + { + // x = y^2 + SymCryptModSquare( pkRsakey->pmModulus, peTmpY, peTmpX, pbFnScratch, cbFnScratch ); + + // if x == 1 then y is a non-trivial root of 1 (it is not -1 or 1) + if( SymCryptModElementIsEqual( pkRsakey->pmModulus, peTmpX, peOne) ) + { + bFoundNonTrivialRoot = TRUE; + break; + } + + // if x == -1, start over + if( SymCryptModElementIsEqual( pkRsakey->pmModulus, peTmpX, peNegOne) ) + { + break; // just break out of inner loop; continues outer loop + } + + // swap x and y + peTmpPtr = peTmpY; + peTmpY = peTmpX; + peTmpX = peTmpPtr; + } + if( bFoundNonTrivialRoot ) + { + break; + } + } + + if( !bFoundNonTrivialRoot ) + { + // we failed to find a non-trivial root of 1, so we cannot recover prime factors + // it is almost certain that this means that the inputs were wrong + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // piPrivExp = y + SymCryptModElementToInt( pkRsakey->pmModulus, peTmpY, piPrivExp, pbFnScratch, cbFnScratch ); + // piPrivExp = y-1 (we know this cannot borrow as y^2 is 1, so y != 0) + SymCryptIntSubUint32( piPrivExp, 1, piPrivExp ); + + // piPrivExp = p0 = GCD(y-1, n) + SymCryptIntExtendedGcd( + piPrivExp, + SymCryptIntFromModulus( pkRsakey->pmModulus ), + SYMCRYPT_FLAG_GCD_INPUTS_NOT_BOTH_EVEN, + piPrivExp, + NULL, + NULL, + NULL, + pbFnScratch, cbFnScratch ); + + // compute the sizes of the primes + pkRsakey->nBitsOfPrimes[0] = SymCryptIntBitsizeOfValue(piPrivExp); + pkRsakey->nBitsOfPrimes[1] = pkRsakey->nBitsOfModulus - pkRsakey->nBitsOfPrimes[0]; + for( UINT32 i=0; i<2; i++ ) + { + pkRsakey->nDigitsOfPrimes[i] = SymCryptDigitsFromBits(pkRsakey->nBitsOfPrimes[i]); + if( pkRsakey->nBitsOfPrimes[i] < SYMCRYPT_RSAKEY_MIN_BITSIZE_PRIME ) + { + scError = SYMCRYPT_WRONG_KEY_SIZE; + goto cleanup; + } + } + pkRsakey->nMaxDigitsOfPrimes = SYMCRYPT_MAX(pkRsakey->nDigitsOfPrimes[0], pkRsakey->nDigitsOfPrimes[1]); + + // Create all the objects + SymCryptRsakeyCreateAllObjects(pkRsakey); + + scError = SymCryptIntCopyMixedSize( piPrivExp, SymCryptIntFromModulus( pkRsakey->pmPrimes[0] ) ); + if( scError != SYMCRYPT_NO_ERROR ) + { + // only fails if we computed the wrong bit-size for the primes above + scError = SYMCRYPT_HARDWARE_FAILURE; + goto cleanup; + } + + SymCryptIntToModulus( SymCryptIntFromModulus( pkRsakey->pmPrimes[0] ), + pkRsakey->pmPrimes[0], + pkRsakey->nBitsOfModulus, // Average number of operations + SYMCRYPT_FLAG_MODULUS_PARITY_PUBLIC | SYMCRYPT_FLAG_MODULUS_PRIME, + pbFnScratch, cbFnScratch ); + + SymCryptIntDivMod( SymCryptIntFromModulus( pkRsakey->pmModulus ), + SymCryptDivisorFromModulus( pkRsakey->pmPrimes[0] ), + piExpProd, // n / p0 - use piExpProd as Quotient.nDigits must be >= Src.nDigits + piPrivExp, // n % p0 - use piPrivExp as Remainder.nDigits must be >= Divisor.nDigits + pbFnScratch, cbFnScratch ); + + // Check remainder from dividing n by p0 is 0 + if( !SymCryptIntIsEqualUint32( piPrivExp, 0 ) ) + { + // Should always be true as p0 is GCD(y-1, n) so is definitionally a divisor of n + // Failure here indicates something wrong in our math, or hardware failure + scError = SYMCRYPT_HARDWARE_FAILURE; + goto cleanup; + } + + scError = SymCryptIntCopyMixedSize( piExpProd, SymCryptIntFromModulus( pkRsakey->pmPrimes[1] ) ); + if( scError != SYMCRYPT_NO_ERROR ) + { + // only fails if we computed the wrong bit-size for the primes above + scError = SYMCRYPT_HARDWARE_FAILURE; + goto cleanup; + } + + SymCryptIntToModulus( SymCryptIntFromModulus( pkRsakey->pmPrimes[1] ), + pkRsakey->pmPrimes[1], + pkRsakey->nBitsOfModulus, // Average number of operations + SYMCRYPT_FLAG_MODULUS_PARITY_PUBLIC | SYMCRYPT_FLAG_MODULUS_PRIME, + pbFnScratch, cbFnScratch ); + +cleanup: + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsakeySetValueInternal( + _In_reads_bytes_( cbModulus ) PCBYTE pbModulus, + SIZE_T cbModulus, + _In_reads_( nPubExp ) PCUINT64 pu64PubExp, + UINT32 nPubExp, + _In_reads_bytes_opt_( cbPrivateExponent ) PCBYTE pbPrivateExponent, + SIZE_T cbPrivateExponent, + _In_reads_opt_( nPrimes ) PCBYTE * ppPrimes, + _In_reads_opt_( nPrimes ) SIZE_T * pcbPrimes, + UINT32 nPrimes, + SYMCRYPT_NUMBER_FORMAT numFormat, + UINT32 flags, + _Inout_ PSYMCRYPT_RSAKEY pkRsakey ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + // 3 digit sizes of temporary integers: + // - ndPrimes = max digitsize of prime buffers + // - ndMod = pkRsakey->nDigitsOfModulus + // - ndLarge = ndPrimes + ndMod + + UINT32 cbDivisor = 0; + PSYMCRYPT_DIVISOR pdTmp = NULL; + + UINT32 ndMod = 0; + UINT32 cbMod = 0; + PSYMCRYPT_INT piPhi = NULL; + + UINT32 cbLarge = 0; + PSYMCRYPT_INT piAcc = NULL; + + PBYTE pbScratch = NULL; + UINT32 cbScratch = 0; + PBYTE pbFnScratch = NULL; + UINT32 cbFnScratch = 0; + + // Ensure caller has specified what algorithm(s) the key will be used with + UINT32 algorithmFlags = SYMCRYPT_FLAG_RSAKEY_SIGN | SYMCRYPT_FLAG_RSAKEY_ENCRYPT; + // Ensure only allowed flags are specified + UINT32 allowedFlags = SYMCRYPT_FLAG_KEY_NO_FIPS | SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION | algorithmFlags; + + if ( ( ( flags & ~allowedFlags ) != 0 ) || + ( ( flags & algorithmFlags ) == 0) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Check that minimal validation flag only specified with no fips + if ( ( ( flags & SYMCRYPT_FLAG_KEY_NO_FIPS ) == 0 ) && + ( ( flags & SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION ) != 0 ) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Internal requirement that private key is either specified by primes or by private exponent, not both + // This is not exposed to external API surface - if we were to dynamically check, the SYMCRYPT_ERROR + // should indicate internal logic error - for now just assert + SYMCRYPT_ASSERT( (nPrimes==0) || (pbPrivateExponent==NULL) ); + + // Check if the arguments are correct + if ( (pbModulus==NULL) || (cbModulus==0) || // Modulus is needed + (nPubExp != 1) || (pu64PubExp==NULL) || // Exactly 1 public exponent is needed + ((nPrimes != 2) && (nPrimes != 0)) || + ((nPrimes == 2) && ((ppPrimes==NULL) || (pcbPrimes==NULL) || + (ppPrimes[0]==NULL) || (ppPrimes[1]==NULL) || + (pcbPrimes[0]==0) || (pcbPrimes[1]==0))) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + ndMod = pkRsakey->nDigitsOfModulus; + + // Calculate scratch spaces + // No integer overflows as all numbers are limited by ndMod which is checked during Create + if ( (pbPrivateExponent != NULL) || (nPrimes > 0) ) + { + if( pkRsakey->nPrimes != 2 ) + { + // The key was not allocated with space for private key material + // so we cannot set it with private key material + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + cbMod = SymCryptSizeofIntFromDigits( ndMod ); + cbLarge = SymCryptSizeofIntFromDigits( 2 * ndMod ); // 2*ndMod is still < SymCryptDigitsFromBits(SYMCRYPT_INT_MAX_BITS) + cbDivisor = SymCryptSizeofDivisorFromDigits( ndMod ); + + cbScratch = cbMod + cbLarge + cbDivisor + + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_INT_TO_MODULUS(ndMod), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_CRT_GENERATION(ndMod), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_EXTENDED_GCD(ndMod), + SYMCRYPT_MAX( SYMCRYPT_SCRATCH_BYTES_FOR_INT_TO_DIVISOR(ndMod), + SYMCRYPT_SCRATCH_BYTES_FOR_INT_DIVMOD( ndMod, ndMod ) + )))); + + if( pbPrivateExponent != NULL ) + { + // We use at least as much scratch space when importing by private exponent, but probably more + SYMCRYPT_ASSERT( SymCryptDigitsFromBits( 64 ) == 1 ); + + cbScratch = SYMCRYPT_MAX( cbScratch, + SYMCRYPT_SCRATCH_BYTES_FOR_PRIME_RECOVERY(ndMod, 1, pkRsakey->nSetBitsOfModulus) ); + } + } + else + { + cbScratch = SYMCRYPT_SCRATCH_BYTES_FOR_INT_TO_MODULUS(ndMod); + } + + pbScratch = (PBYTE)SymCryptCallbackAlloc( cbScratch ); + if (pbScratch == NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + // Modulus + scError = SymCryptIntSetValue( pbModulus, cbModulus, numFormat, SymCryptIntFromModulus( pkRsakey->pmModulus ) ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // Compute actual modulus size, and check that it isn't bigger than the created size + pkRsakey->nBitsOfModulus = SymCryptIntBitsizeOfValue(SymCryptIntFromModulus(pkRsakey->pmModulus)); + if (pkRsakey->nBitsOfModulus > pkRsakey->nSetBitsOfModulus) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if (pkRsakey->nBitsOfModulus < SYMCRYPT_RSAKEY_MIN_BITSIZE_MODULUS) + { + scError = SYMCRYPT_WRONG_KEY_SIZE; + goto cleanup; + } + + // IntToModulus requirement: + // nBitsOfModulus >= SYMCRYPT_RSAKEY_MIN_BITSIZE_MODULUS --> pmModulus > 0 + SymCryptIntToModulus( + SymCryptIntFromModulus( pkRsakey->pmModulus ), + pkRsakey->pmModulus, + pkRsakey->nBitsOfModulus, + SYMCRYPT_FLAG_DATA_PUBLIC, + pbScratch, + cbScratch ); + + // Public exponents + pkRsakey->nPubExp = nPubExp; + for (UINT32 i = 0; i<pkRsakey->nPubExp; i++) + { + pkRsakey->au64PubExp[i] = pu64PubExp[i]; + } + + // Private key import either by private exponent or primes + if ( (pbPrivateExponent != NULL) || (nPrimes > 0) ) + { + if (pbPrivateExponent != NULL) + { + // Private exponent + scError = SymCryptRsakeyCalculatePrimesFromPrivateExponent( + pkRsakey, + pbPrivateExponent, cbPrivateExponent, + numFormat, + pbScratch, cbScratch ); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + pbFnScratch = pbScratch; + cbFnScratch = cbScratch; + + // Create temporary piPhi + piPhi = SymCryptIntCreate( pbFnScratch, cbMod, ndMod ); pbFnScratch += cbMod; cbFnScratch -= cbMod; + } + else //if (nPrimes > 0) + { + // Primes + pbFnScratch = pbScratch; + cbFnScratch = cbScratch; + + // Create temporary piPhi + piPhi = SymCryptIntCreate( pbFnScratch, cbMod, ndMod ); pbFnScratch += cbMod; cbFnScratch -= cbMod; + + // First fix the tight number of digits of each prime + pkRsakey->nMaxDigitsOfPrimes = 0; + for (UINT32 i=0; i<pkRsakey->nPrimes; i++) + { +#pragma warning(suppress: 26007) // "Incorrect Annotation" - cannot phrase array of pointers to arrays in SAL + scError = SymCryptIntSetValue( ppPrimes[i], pcbPrimes[i], numFormat, piPhi ); + if (scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + pkRsakey->nBitsOfPrimes[i] = SymCryptIntBitsizeOfValue(piPhi); + pkRsakey->nDigitsOfPrimes[i] = SymCryptDigitsFromBits(pkRsakey->nBitsOfPrimes[i]); + + pkRsakey->nMaxDigitsOfPrimes = SYMCRYPT_MAX(pkRsakey->nMaxDigitsOfPrimes, pkRsakey->nDigitsOfPrimes[i]); + + if (pkRsakey->nBitsOfPrimes[i] < SYMCRYPT_RSAKEY_MIN_BITSIZE_PRIME) + { + scError = SYMCRYPT_WRONG_KEY_SIZE; + goto cleanup; + } + } + + // Create all the objects + SymCryptRsakeyCreateAllObjects(pkRsakey); + + // Set the values + for (UINT32 i=0; i<pkRsakey->nPrimes; i++) + { +#pragma warning(suppress: 26007) // "Incorrect Annotation" - cannot phrase array of pointers to arrays in SAL + scError = SymCryptIntSetValue( ppPrimes[i], pcbPrimes[i], numFormat, SymCryptIntFromModulus( pkRsakey->pmPrimes[i] ) ); + if (scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Check that this prime is odd (should we check for primality?) + if ((SymCryptIntGetValueLsbits32(SymCryptIntFromModulus( pkRsakey->pmPrimes[i] ))& 1)==0) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // IntToModulus requirement: + // nBitsOfPrimes >= SYMCRYPT_RSAKEY_MIN_BITSIZE_PRIME --> pmPrimes[i] > 0 + SymCryptIntToModulus( + SymCryptIntFromModulus( pkRsakey->pmPrimes[i] ), + pkRsakey->pmPrimes[i], + pkRsakey->nBitsOfModulus, // Average number of operations + SYMCRYPT_FLAG_MODULUS_PARITY_PUBLIC | SYMCRYPT_FLAG_MODULUS_PRIME, + pbFnScratch, + cbFnScratch ); + } + } + + // Create remaining temporaries + piAcc = SymCryptIntCreate( pbFnScratch, cbLarge, 2 * ndMod ); pbFnScratch += cbLarge; cbFnScratch -= cbLarge; + pdTmp = SymCryptDivisorCreate( pbFnScratch, cbDivisor, ndMod ); pbFnScratch += cbDivisor; cbFnScratch -= cbDivisor; + + // Calculate the rest of the fields + scError = SymCryptRsakeyCalculatePrivateFields( pkRsakey, pdTmp, piPhi, piAcc, pbFnScratch, cbFnScratch, flags & SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION ); + if (scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // Everything is set here + pkRsakey->hasPrivateKey = TRUE; + } + + pkRsakey->fAlgorithmInfo = flags; // We want to track all of the flags in the Rsakey + + if ( ( flags & SYMCRYPT_FLAG_KEY_NO_FIPS ) == 0 ) + { + // Ensure RSA algorithm selftest is run before first use of RSA algorithm + SYMCRYPT_RUN_SELFTEST_ONCE( + SymCryptRsaSelftest, + SYMCRYPT_SELFTEST_ALGORITHM_RSA); + + // PCT does not need to be run on import - mark it as done + pkRsakey->fAlgorithmInfo |= SYMCRYPT_PCT_RSA_SIGN; + } + +cleanup: + if (pbScratch!=NULL) + { + SymCryptWipe(pbScratch,cbScratch); + SymCryptCallbackFree(pbScratch); + } + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsakeySetValue( + _In_reads_bytes_( cbModulus ) PCBYTE pbModulus, + SIZE_T cbModulus, + _In_reads_( nPubExp ) PCUINT64 pu64PubExp, + UINT32 nPubExp, + _In_reads_opt_( nPrimes ) PCBYTE * ppPrimes, + _In_reads_opt_( nPrimes ) SIZE_T * pcbPrimes, + UINT32 nPrimes, + SYMCRYPT_NUMBER_FORMAT numFormat, + UINT32 flags, + _Inout_ PSYMCRYPT_RSAKEY pkRsakey ) +{ + return SymCryptRsakeySetValueInternal( + pbModulus, cbModulus, + pu64PubExp, nPubExp, + NULL, 0, + ppPrimes, pcbPrimes, nPrimes, + numFormat, + flags, + pkRsakey ); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsakeySetValueFromPrivateExponent( + _In_reads_bytes_( cbModulus ) PCBYTE pbModulus, + SIZE_T cbModulus, + UINT64 u64PubExp, + _In_reads_bytes_( cbPrivateExponent ) PCBYTE pbPrivateExponent, + SIZE_T cbPrivateExponent, + SYMCRYPT_NUMBER_FORMAT numFormat, + UINT32 flags, + _Inout_ PSYMCRYPT_RSAKEY pkRsakey ) +{ + return SymCryptRsakeySetValueInternal( + pbModulus, cbModulus, + &u64PubExp, 1, + pbPrivateExponent, cbPrivateExponent, + NULL, NULL, 0, + numFormat, + flags, + pkRsakey ); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsakeyGetValue( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _Out_writes_bytes_( cbModulus ) PBYTE pbModulus, + SIZE_T cbModulus, + _Out_writes_opt_( nPubExp ) PUINT64 pu64PubExp, + UINT32 nPubExp, + _Out_writes_opt_( nPrimes ) PBYTE * ppPrimes, + _In_reads_opt_( nPrimes ) SIZE_T * pcbPrimes, + UINT32 nPrimes, + SYMCRYPT_NUMBER_FORMAT numFormat, + UINT32 flags ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + UNREFERENCED_PARAMETER( flags ); + + // Check if private key needed but not there + if ((nPrimes!=0) && (pkRsakey->hasPrivateKey == FALSE)) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Modulus + if (pbModulus!=NULL) + { + // We'll get an error if cbModulus is 0 or too small + scError = SymCryptIntGetValue( SymCryptIntFromModulus( pkRsakey->pmModulus ), pbModulus, cbModulus, numFormat ); + if (scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + } + + // Public exponents + if( pu64PubExp != NULL ) + { + if( nPubExp != 1 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + pu64PubExp[0] = pkRsakey->au64PubExp[0]; + } + + // Primes i.e. private key + if( nPrimes != 0 ) + { + if( nPrimes != 2 || ppPrimes == NULL || pcbPrimes == NULL ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + for (UINT32 i=0; i<nPrimes; i++) + { + if (ppPrimes[i]!=NULL) + { + scError = SymCryptIntGetValue( SymCryptIntFromModulus( pkRsakey->pmPrimes[i] ), ppPrimes[i], pcbPrimes[i], numFormat ); + if (scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + } + } + } + +cleanup: + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsakeyGetCrtValue( + _In_ PCSYMCRYPT_RSAKEY pkRsakey, + _Out_writes_opt_(nCrtExponents) PBYTE * ppCrtExponents, + _In_reads_(nCrtExponents) SIZE_T * pcbCrtExponents, + UINT32 nCrtExponents, + _Out_writes_bytes_opt_(cbCrtCoefficient) PBYTE pbCrtCoefficient, + SIZE_T cbCrtCoefficient, + _Out_writes_bytes_opt_(cbPrivateExponent) PBYTE pbPrivateExponent, + SIZE_T cbPrivateExponent, + SYMCRYPT_NUMBER_FORMAT numFormat, + UINT32 flags) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PBYTE pbScratch = NULL; + SIZE_T cbScratch = 0; + + UNREFERENCED_PARAMETER( flags ); + + // Check if the arguments are correct + if ( (ppCrtExponents==NULL) && (nCrtExponents!=0) || + (nCrtExponents != 0 && nCrtExponents != 2 )) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Crt value can only be available we have private key. + if (pkRsakey->hasPrivateKey == FALSE) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Crt exponents + for (UINT32 i=0; i<nCrtExponents; i++) + { + if (ppCrtExponents[i]!=NULL) + { + scError = SymCryptIntGetValue( pkRsakey->piCrtPrivExps[i], ppCrtExponents[i], pcbCrtExponents[i], numFormat ); + if (scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + } + } + + if (pbCrtCoefficient!=NULL) + { + cbScratch = SYMCRYPT_SCRATCH_BYTES_FOR_COMMON_MOD_OPERATIONS( pkRsakey->nDigitsOfModulus ); + pbScratch = SymCryptCallbackAlloc( cbScratch ); + + if (pbScratch==NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + scError = SymCryptModElementGetValue( + pkRsakey->pmPrimes[0], + pkRsakey->peCrtInverses[0], + pbCrtCoefficient, + cbCrtCoefficient, + numFormat, + pbScratch, + cbScratch); + if (scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + } + + if (pbPrivateExponent!=NULL) + { + scError = SymCryptIntGetValue( pkRsakey->piPrivExps[0], pbPrivateExponent, cbPrivateExponent, numFormat ); + if (scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + } + +cleanup: + + if (pbScratch!=NULL) + { + SymCryptWipe(pbScratch,cbScratch); + SymCryptCallbackFree(pbScratch); + } + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsakeyExtendKeyUsage( + _Inout_ PSYMCRYPT_RSAKEY pkRsakey, + UINT32 flags ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + // Ensure caller has specified what algorithm(s) the key will be used with + UINT32 algorithmFlags = SYMCRYPT_FLAG_RSAKEY_SIGN | SYMCRYPT_FLAG_RSAKEY_ENCRYPT; + + if ( ( ( flags & ~algorithmFlags ) != 0 ) || + ( ( flags & algorithmFlags ) == 0) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + pkRsakey->fAlgorithmInfo |= flags; + +cleanup: + return scError; +} diff --git a/libs/symcrypt/lib/sc_lib.h b/libs/symcrypt/lib/sc_lib.h new file mode 100644 index 00000000000..faf42ec906a --- /dev/null +++ b/libs/symcrypt/lib/sc_lib.h @@ -0,0 +1,5161 @@ +// +// sc_lib.h +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// Internal definitions for the symcrypt library. +// This include file is used only for the files inside the library, not by +// the code that calls the library. +// + +#include <windef.h> +#include <winnt.h> + +#if SYMCRYPT_MS_VC +#define SYMCRYPT_DISABLE_CFG __declspec(guard(nocf)) +#else +#define SYMCRYPT_DISABLE_CFG +#endif + +// +// Global flags +// + +#define SYMCRYPT_FLAG_LIB_INITIALIZED 0x00000001 + +extern UINT32 g_SymCryptFlags; + +//============================================================================================== +// Common environment functions +//============================================================================================== + +VOID +SYMCRYPT_CALL +SymCryptInitEnvCommon( UINT32 version ); + +_Analysis_noreturn_ +VOID +SYMCRYPT_CALL +SymCryptFatalHang( UINT32 fatalcode ); + +#include <symcrypt_low_level.h> + +// Types + +typedef int BOOL; + +#if !defined(TRUE) +#define TRUE (1) +#endif + +#if !defined(FALSE) +#define FALSE (0) +#endif + +#if !defined(UNREFERENCED_PARAMETER) +#define UNREFERENCED_PARAMETER(x) ((void)x) +#endif + +#if !defined(FAST_FAIL_CRYPTO_LIBRARY) +#define FAST_FAIL_CRYPTO_LIBRARY 22 +#endif + +// +// We want to write some of our code to use the native register size provided by the platform we are using to enable +// generic code to compile into reasonable performant versions on 32b and 64b platforms. Below definitions give us +// this flexibility without relying on compiler specifics. +// +// WARNING: Some use of NATIVE_UINT also relies on the little-endianness of the 64b platform; our generic code normally +// uses UINT32, and at the time of writing mixing UINT32 and NATIVE_UINT will not work on a big-endian 64b platform! +// +#if SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM64 +typedef INT64 NATIVE_INT; +typedef UINT64 NATIVE_UINT; +#define NATIVE_BITS (64) +#define NATIVE_BYTES (8) +#define NATIVE_BYTES_LOG2 (3) +#else +typedef INT32 NATIVE_INT; +typedef UINT32 NATIVE_UINT; +#define NATIVE_BITS (32) +#define NATIVE_BYTES (4) +#define NATIVE_BYTES_LOG2 (2) +#endif + + +// +// Our Wipe code uses FORCE_WRITE* which are implemented using +// WriteNoFence* functions. Unfortunately, they declare their parameter +// to be interlocked, and the compiler complains when we also access the variable +// using non-interlocked code. +// This warning is nonsensical in our situation, so we disable it. +// The second warning is about accessing a local variable via an interlocked ptr. +// +#pragma prefast( disable:28112 ) +#pragma prefast( disable:28113 ) +#pragma warning( disable: 4702 ) // unreachable code. The compilers are not equally smart, and some complain + // about 'function must return a value' and some about 'unreachable code' +#pragma warning( disable: 4296 ) // expression is always false - this warning is forced to be an error by a + // pragma in the SDK warning.h, but we don't consider it useful + + +// +// These macros allow a bunch of generic code to be written. +// For example, the Hash append function is written once generically +// using these macros. +// + +#define CONCAT_I2( a, b ) a##b +#define CONCAT_I3( a, b, c ) a##b##c + + +#define CONCAT2( a, b ) CONCAT_I2( a, b ) +#define CONCAT3( a, b, c ) CONCAT_I3( a, b, c ) +//#define CONCAT4( a, b, c, d) a##b##c##d + + + +#define SYMCRYPT_XXX_STATE CONCAT3( SYMCRYPT_, ALG, _STATE ) +#define PSYMCRYPT_XXX_STATE CONCAT3( PSYMCRYPT_, ALG, _STATE ) +#define PCSYMCRYPT_XXX_STATE CONCAT3( PCSYMCRYPT_, ALG, _STATE ) + +#define SYMCRYPT_Xxx CONCAT2( SymCrypt, Alg ) + +#define SYMCRYPT_XxxStateCopy CONCAT3( SymCrypt, Alg, StateCopy ) +#define SYMCRYPT_XxxInit CONCAT3( SymCrypt, Alg, Init ) +#define SYMCRYPT_XxxAppend CONCAT3( SymCrypt, Alg, Append ) +#define SYMCRYPT_XxxResult CONCAT3( SymCrypt, Alg, Result ) +#define SYMCRYPT_XxxAppendBlocks CONCAT3( SymCrypt, Alg, AppendBlocks ) +#define SYMCRYPT_XxxStateImport CONCAT3( SymCrypt, Alg, StateImport) +#define SYMCRYPT_XxxStateExport CONCAT3( SymCrypt, Alg, StateExport) + +// for XOFs and KMAC +#define SYMCRYPT_XXX_EXPANDED_KEY CONCAT3( SYMCRYPT_, ALG, _EXPANDED_KEY ) +#define PSYMCRYPT_XXX_EXPANDED_KEY CONCAT3( PSYMCRYPT_, ALG, _EXPANDED_KEY ) +#define PCSYMCRYPT_XXX_EXPANDED_KEY CONCAT3( PCSYMCRYPT_, ALG, _EXPANDED_KEY ) +#define SYMCRYPT_XxxEx CONCAT3( SymCrypt, Alg, Ex) +#define SYMCRYPT_XxxDefault CONCAT3( SymCrypt, Alg, Default ) +#define SYMCRYPT_XxxExpandKey CONCAT3( SymCrypt, Alg, ExpandKey ) +#define SYMCRYPT_XxxExpandKeyEx CONCAT3( SymCrypt, Alg, ExpandKeyEx ) +#define SYMCRYPT_XxxExtract CONCAT3( SymCrypt, Alg, Extract ) +#define SYMCRYPT_XxxResultEx CONCAT3( SymCrypt, Alg, ResultEx ) +#define SYMCRYPT_XxxKeyCopy CONCAT3( SymCrypt, Alg, KeyCopy ) + +#define SYMCRYPT_HmacXxx CONCAT2( SymCryptHmac, Alg ) +#define SYMCRYPT_HmacXxxStateCopy CONCAT3( SymCryptHmac, Alg, StateCopy ) +#define SYMCRYPT_HmacXxxKeyCopy CONCAT3( SymCryptHmac, Alg, KeyCopy ) +#define SYMCRYPT_HmacXxxExpandKey CONCAT3( SymCryptHmac, Alg, ExpandKey ) +#define SYMCRYPT_HmacXxxInit CONCAT3( SymCryptHmac, Alg, Init ) +#define SYMCRYPT_HmacXxxAppend CONCAT3( SymCryptHmac, Alg, Append ) +#define SYMCRYPT_HmacXxxResult CONCAT3( SymCryptHmac, Alg, Result ) + + +#define SYMCRYPT_XXX_INPUT_BLOCK_SIZE CONCAT3( SYMCRYPT_, ALG, _INPUT_BLOCK_SIZE ) +#define SYMCRYPT_XXX_RESULT_SIZE CONCAT3( SYMCRYPT_, ALG, _RESULT_SIZE ) + +#define SYMCRYPT_HMAC_XXX_INPUT_BLOCK_SIZE SYMCRYPT_XXX_INPUT_BLOCK_SIZE +#define SYMCRYPT_HMAC_XXX_RESULT_SIZE SYMCRYPT_XXX_RESULT_SIZE + +#define PSYMCRYPT_HMAC_XXX_EXPANDED_KEY CONCAT3( PSYMCRYPT_HMAC_, ALG, _EXPANDED_KEY ) +#define PCSYMCRYPT_HMAC_XXX_EXPANDED_KEY CONCAT3( PCSYMCRYPT_HMAC_, ALG, _EXPANDED_KEY ) +#define SYMCRYPT_HMAC_XXX_STATE CONCAT3( SYMCRYPT_HMAC_, ALG, _STATE ) +#define PSYMCRYPT_HMAC_XXX_STATE CONCAT3( PSYMCRYPT_HMAC_, ALG, _STATE ) +#define PCSYMCRYPT_HMAC_XXX_STATE CONCAT3( PCSYMCRYPT_HMAC_, ALG, _STATE ) + + +//============================================================================================== +// PLATFORM SPECIFICS +//============================================================================================== + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + +// +// The XMM save/restore functions need to be passed a buffer in which they can store their data. +// We have two different places where we use this, in kernel mode and in user mode (while testing) +// We can't declare a union of the two structs as we can't include the kernel-mode headers in this file +// when compiled for a user-mode app. +// Instead we define a structure with reserved space, and have each environment check the size and +// cast the pointer. +// +// We always use the KeSaveExtendedProcessorState call, and not the KeSaveFloatingPointState as it +// allows us to save only the XMM registers and not touch the X87/MMX registers which should +// save time. +// +#if SYMCRYPT_CPU_X86 + +// +// The XSTATE_SAVE structure consists of a union between +// struct: +// - INT64 8 +// - INT32 4 +// - Pointer 4 +// - Pointer 4 +// - Pointer 4 +// - Pointer 4 +// - BYTE 1 + 3 padding +// 32 total +// - XSTATE_CONTEXT +// - UINT64 8 +// - UINT32 4 +// - UINT32 4 +// - Pointer + UINT32 8 +// - Pointer + UINT32 8 +// 32 total +// +// Experimentally: need 4 more bytes, don't know why yet. +// Should have a look with the debugger when I have time. +// + +#define SYMCRYPT_XSTATE_SAVE_SIZE (32) + +#elif SYMCRYPT_CPU_AMD64 + +// +// The XSTATE_SAVE structure consists of +// - pointer 8 +// - pointer 8 +// - BYTE 1 + 7 padding +// - XSTATE_CONTEXT +// - UINT64 8 +// - UINT32 4 +// - UINT32 4 +// - Pointer 8 +// - Pointer 8 +// +#define SYMCRYPT_XSTATE_SAVE_SIZE (56) + +#endif + +typedef +SYMCRYPT_ALIGN +struct _SYMCRYPT_EXTENDED_SAVE_DATA { + SYMCRYPT_ALIGN BYTE data[SYMCRYPT_XSTATE_SAVE_SIZE]; + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_EXTENDED_SAVE_DATA, *PSYMCRYPT_EXTENDED_SAVE_DATA; + + +// +// Two functions to save/restore the XMM registers. +// These must ALWAYS be called in pairs, even if the SaveXmm function returned an error. +// XMM registers cannot be used if the save function returned an error. +// If the SYMCRYPT_CPU_FEATURE_SAVEXMM_NOFAIL feature is present, then the +// SymCryptSaveXmm function will never return an error. +// + +// +// Functions to save/restore the XMM or YMM registers. +// If the Save*mm function is called and succeeds, then the corresponding +// Restore*mm function MUST be called later on the same thread. +// The extended registers cannot be called if the Save function returns an error. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSaveXmm( _Out_ PSYMCRYPT_EXTENDED_SAVE_DATA pSaveData ); + +VOID +SYMCRYPT_CALL +SymCryptRestoreXmm( _Inout_ PSYMCRYPT_EXTENDED_SAVE_DATA pSaveData ); + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSaveYmm( _Out_ PSYMCRYPT_EXTENDED_SAVE_DATA pSaveData ); + +VOID +SYMCRYPT_CALL +SymCryptRestoreYmm( _Inout_ PSYMCRYPT_EXTENDED_SAVE_DATA pSaveData ); +#endif + + +//============================================================================================== +// Library declarations +//============================================================================================== + +// +// Function to check that the library has been initialized +// +#if SYMCRYPT_DEBUG + +VOID +SYMCRYPT_CALL +SymCryptLibraryWasNotInitialized(void); + +static +FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptCheckLibraryInitialized(void) +{ + if( !(g_SymCryptFlags & SYMCRYPT_FLAG_LIB_INITIALIZED) ) + { + SymCryptLibraryWasNotInitialized(); + } +} +#else +static +FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptCheckLibraryInitialized(void) +{ +} +#endif + +#define HMAC_IPAD_BYTE 0x36 +#define HMAC_OPAD_BYTE 0x5c + +// SYMCRYPT_CPU_FEATURES +#define SYMCRYPT_CPU_FEATURES_FOR_PCLMULQDQ_CODE (SYMCRYPT_CPU_FEATURE_PCLMULQDQ | SYMCRYPT_CPU_FEATURE_SSSE3 | SYMCRYPT_CPU_FEATURE_SAVEXMM_NOFAIL ) + +#define SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE (SYMCRYPT_CPU_FEATURE_SSSE3 | SYMCRYPT_CPU_FEATURE_AESNI) +#define SYMCRYPT_CPU_FEATURES_FOR_AESNI_PCLMULQDQ_CODE (SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE | SYMCRYPT_CPU_FEATURES_FOR_PCLMULQDQ_CODE) +#define SYMCRYPT_CPU_FEATURES_FOR_VAES_256_CODE (SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE | SYMCRYPT_CPU_FEATURE_AVX2 | SYMCRYPT_CPU_FEATURE_VAES) +#define SYMCRYPT_CPU_FEATURES_FOR_VAES_512_CODE (SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE | SYMCRYPT_CPU_FEATURE_AVX512 | SYMCRYPT_CPU_FEATURE_VAES) + +#define SYMCRYPT_CPU_FEATURES_FOR_SHANI_CODE (SYMCRYPT_CPU_FEATURE_SSSE3 | SYMCRYPT_CPU_FEATURE_SHANI) + +#define SYMCRYPT_CPU_FEATURES_FOR_MULX (SYMCRYPT_CPU_FEATURE_BMI2 | SYMCRYPT_CPU_FEATURE_ADX | SYMCRYPT_CPU_FEATURE_SSE2 ) + +// +// ROTATE OPERATIONS +// +// +// If this lib is ever ported to a platform that doesn't have the _rotx functions +// the macros can be replaced by portable definitions just like the ROL16/ROR16 +// + +#define ROL16( x, n ) ((UINT16)( ( ((x) << (n)) | ((x) >> (16-(n))) ) )) +#define ROR16( x, n ) ((UINT16)( ( ((x) >> (n)) | ((x) << (16-(n))) ) )) + +#if SYMCRYPT_MS_VC + #define ROL32( x, n ) _rotl( (x), (n) ) + #define ROR32( x, n ) _rotr( (x), (n) ) + #define ROL64( x, n ) _rotl64( (x), (n) ) + #define ROR64( x, n ) _rotr64( (x), (n) ) +#elif SYMCRYPT_GNUC + #define ROL32( x, n ) ((UINT32)( ( ((x) << (n)) | ((x) >> (32-(n))) ) )) + #define ROR32( x, n ) ((UINT32)( ( ((x) >> (n)) | ((x) << (32-(n))) ) )) + #define ROL64( x, n ) ((UINT64)( ( ((x) << (n)) | ((x) >> (64-(n))) ) )) + #define ROR64( x, n ) ((UINT64)( ( ((x) >> (n)) | ((x) << (64-(n))) ) )) +#else + #error Unknown compiler +#endif + + +#define SYMCRYPT_ARRAY_SIZE(_x) (sizeof(_x)/sizeof(_x[0])) + +enum{ + STATE_NEXT = 0, // starting state = 0, set by structure wipe. + STATE_DATA_START, + STATE_DATA_END, + STATE_RESULT2, // 2nd phase of result computation (1st phase is at STATE_NEXT when the result operation is found) + STATE_RESULT_DONE, // 3rd phase of result computation +}; + + + +//========================================================================== +// Inline implementations ... +//========================================================================== + +// +// These are a bunch of functions to convert between an array of +// 32 or 64-bit integers to an array of bytes in LSBfirst or MSBfirst convention. +// Not all variations have been implemented yet. We add them as they are +// needed. +// + +// +// These implementations are optimized for inlining, especially when the +// size of the data to be converted is a compile-time constant. +// + +// +// SymCryptUint32ToMsbFirst & SymCryptMsbFirstToUint32. +// This is used by the SHA family +// +#if SYMCRYPT_CPU_AMD64 + +// +// On AMD64 we can do 2 UINT32s at once by doing a ROL(x,32) and a BSWAP. +// +static +FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptUint32ToMsbFirst( _In_reads_(cuData) PCUINT32 puData, + _Out_writes_(4*cuData) PBYTE pbResult, + SIZE_T cuData ) +{ + while( cuData >= 2 ) + { + SYMCRYPT_STORE_MSBFIRST64( pbResult, ROL64( *(UINT64*)puData, 32 )); + pbResult += 8; + puData += 2; + cuData -= 2; + } + + if( cuData != 0 ) + { + SYMCRYPT_STORE_MSBFIRST32( pbResult, *puData ); + } +} + +#else // not _AMD64_ + +static +FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptUint32ToMsbFirst( _In_reads_(cuData) PCUINT32 puData, + _Out_writes_(4*cuData) PBYTE pbResult, + SIZE_T cuData ) +{ + while( cuData != 0 ) + { + SYMCRYPT_STORE_MSBFIRST32( pbResult, *puData ); + puData++; + pbResult += 4; + cuData--; + } +} +#endif // platform switch for SymCryptUint32ToMsbFirst + +static +FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptMsbFirstToUint32( _In_reads_(4*cuResult) PCBYTE pbData, + _Out_writes_(cuResult) PUINT32 puResult, + SIZE_T cuResult ) +{ + while( cuResult != 0 ) + { + *puResult = SYMCRYPT_LOAD_MSBFIRST32( pbData ); + puResult++; + pbData += 4; + cuResult--; + } +} + + +// +// SymCryptUint32ToLsbFirst & SymCryptLsbFirstToUint32 +// These are used by the MD4 and MD5 hash functions +// +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM | SYMCRYPT_CPU_ARM64 + +// +// On AMD64, X86, and ARM this is just a memcpy +// +static +FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptUint32ToLsbFirst( _In_reads_(cuData) PCUINT32 puData, + _Out_writes_(4*cuData) PBYTE pbResult, + SIZE_T cuData ) + +{ + memcpy( pbResult, puData, 4*cuData ); +} + +static +FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptLsbFirstToUint32( _In_reads_(4*cuResult) PCBYTE pbData, + _Out_writes_(cuResult) PUINT32 puResult, + SIZE_T cuResult ) +{ + memcpy( puResult, pbData, 4*cuResult ); +} + +#else // not (AMD64_ or X86_ or ARM or ARM64) + +static +FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptUint32ToLsbFirst( _In_reads_(cuData) PCUINT32 puData, + _Out_writes_(4*cuData) PBYTE pbResult, + SIZE_T cuData ) +{ + while( cuData != 0 ) + { + SYMCRYPT_STORE_LSBFIRST32( pbResult, *puData ); + puData++; + pbResult += 4; + cuData--; + } +} + +static +FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptLsbFirstToUint32( _In_reads_(4*cuResult) PCBYTE pbData, + _Out_writes_(cuResult) PUINT32 puResult, + SIZE_T cuResult ) +{ + while( cuResult != 0 ) + { + *puResult = SYMCRYPT_LOAD_LSBFIRST32( pbData ); + pbData += 4; + puResult++; + cuResult--; + } +} + +#endif // Platform switch for SymCryptUint32ToLsbFirst + + +// +// SymCryptUint64ToLsbFirst & SymCryptLsbFirstToUint64 +// These are used by Keccak. +// +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM | SYMCRYPT_CPU_ARM64 + +// +// On AMD64, X86, and ARM this is just a memcpy +// +static +FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptUint64ToLsbFirst( _In_reads_(cuData) PCUINT64 puData, + _Out_writes_(8*cuData) PBYTE pbResult, + SIZE_T cuData ) + +{ + memcpy( pbResult, puData, 8*cuData ); +} + +static +FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptLsbFirstToUint64( _In_reads_(8*cuResult) PCBYTE pbData, + _Out_writes_(cuResult) PUINT64 puResult, + SIZE_T cuResult ) +{ + memcpy( puResult, pbData, 8*cuResult ); +} + +#else // not (AMD64_ or X86_ or ARM or ARM64) + +static +FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptUint64ToLsbFirst( _In_reads_(cuData) PCUINT64 puData, + _Out_writes_(8*cuData) PBYTE pbResult, + SIZE_T cuData ) +{ + while( cuData != 0 ) + { + SYMCRYPT_STORE_LSBFIRST64( pbResult, *puData ); + puData++; + pbResult += 8; + cuData--; + } +} + +static +FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptLsbFirstToUint64( _In_reads_(8*cuResult) PCBYTE pbData, + _Out_writes_(cuResult) PUINT64 puResult, + SIZE_T cuResult ) +{ + while( cuResult != 0 ) + { + *puResult = SYMCRYPT_LOAD_LSBFIRST64( pbData ); + pbData += 8; + puResult++; + cuResult--; + } +} + +#endif // Platform switch for SymCryptUint64ToLsbFirst & SymCryptLsbFirstToUint64 + + +// +// SymCryptUint64ToMsbFirst & SymCryptMsbFirstToUint64 +// +static +FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptUint64ToMsbFirst( _In_reads_(cuData) PCUINT64 puData, + _Out_writes_(8*cuData) PBYTE pbResult, + SIZE_T cuData ) +{ + while( cuData != 0 ) + { + SYMCRYPT_STORE_MSBFIRST64( pbResult, *puData ); + pbResult += 8; + puData ++; + cuData --; + } +} + +static +FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptMsbFirstToUint64( _In_reads_(8*cuResult) PCBYTE pbData, + _Out_writes_(cuResult) PUINT64 puResult, + SIZE_T cuResult ) +{ + while( cuResult != 0 ) + { + *puResult = SYMCRYPT_LOAD_MSBFIRST64( pbData ); + puResult++; + pbData += 8; + cuResult--; + } +} + +//////////////////////////////////////////////////////////////////////////////////// +// Internal function prototypes +// + +// +// SymCryptSha1AppendBlocks +// +// Updates the chaining state of the hash function with one or more blocks of data. +// Each block is 64 bytes long, the natural size of a SHA256 input block. +// +// cbData must be a multiple of 64. +// +VOID +SYMCRYPT_CALL +SymCryptSha1AppendBlocks( + _Inout_ SYMCRYPT_SHA1_CHAINING_STATE * pChain, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ); + +// +// SymCryptSha256AppendBlocks +// +// Updates the chaining state of the hash function with one or more blocks of data. +// Each block is 64 bytes long, the natural size of a SHA256 input block. +// +// cbData must be a multiple of 64. +// +VOID +SYMCRYPT_CALL +SymCryptSha256AppendBlocks( + _Inout_ SYMCRYPT_SHA256_CHAINING_STATE * pChain, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ); + +// Intrinsics implementation processing 4 message blocks in parallel using XMM registers +VOID +SYMCRYPT_CALL +SymCryptSha256AppendBlocks_xmm_4blocks( + _Inout_ SYMCRYPT_SHA256_CHAINING_STATE * pChain, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ); + +// Assembly implementation processing 4 message blocks in parallel using XMM registers +VOID +SYMCRYPT_CALL +SymCryptSha256AppendBlocks_xmm_ssse3_asm( + _Inout_ SYMCRYPT_SHA256_CHAINING_STATE * pChain, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ); + +// Intrinsics implementation processing 8 message blocks in parallel using YMM registers +VOID +SYMCRYPT_CALL +SymCryptSha256AppendBlocks_ymm_8blocks( + _Inout_ SYMCRYPT_SHA256_CHAINING_STATE * pChain, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ); + +// Assembly implementation processing 8 message blocks in parallel using YMM registers +VOID +SYMCRYPT_CALL +SymCryptSha256AppendBlocks_ymm_avx2_asm( + _Inout_ SYMCRYPT_SHA256_CHAINING_STATE * pChain, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ); + + +// +// SymCryptSha512AppendBlocks +// +// Updates the chaining state of the hash function with one or more blocks of data. +// Each block is 128 bytes long, the natural size of a SHA512 input block. +// +// cbData must be a multiple of 128. +// +VOID +SYMCRYPT_CALL +SymCryptSha512AppendBlocks( + _Inout_ SYMCRYPT_SHA512_CHAINING_STATE * pChain, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ); + + +VOID +SYMCRYPT_CALL +SymCryptSha512AppendBlocks_xmm( + _Inout_ SYMCRYPT_SHA512_CHAINING_STATE * pChain, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ); + +// Intrinsics implementation using YMM registers +VOID +SYMCRYPT_CALL +SymCryptSha512AppendBlocks_ymm_1block( + _Inout_ SYMCRYPT_SHA512_CHAINING_STATE * pChain, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ); + +// Intrinsics implementation processing 2 message blocks in parallel using YMM registers +VOID +SYMCRYPT_CALL +SymCryptSha512AppendBlocks_ymm_2blocks( + _Inout_ SYMCRYPT_SHA512_CHAINING_STATE * pChain, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ); + +// Intrinsics implementation processing 4 message blocks in parallel using YMM registers +VOID +SYMCRYPT_CALL +SymCryptSha512AppendBlocks_ymm_4blocks( + _Inout_ SYMCRYPT_SHA512_CHAINING_STATE * pChain, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ); + +// Assembly implementation processing 4 message blocks in parallel using YMM registers +VOID +SYMCRYPT_CALL +SymCryptSha512AppendBlocks_ymm_avx2_asm( + _Inout_ SYMCRYPT_SHA512_CHAINING_STATE * pChain, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ); + +// Assembly implementation processing 4 message blocks in parallel using YMM registers with AVX512 instruction set +VOID +SYMCRYPT_CALL +SymCryptSha512AppendBlocks_ymm_avx512vl_asm( + _Inout_ SYMCRYPT_SHA512_CHAINING_STATE * pChain, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ); + + + + +// +// SymCryptMd5AppendBlocks +// +// Updates the chaining state of the hash function with one or more blocks of data. +// Each block is 64 bytes long, the natural size of a MD5 input block. +// +// cbData must be a multiple of 64. +// +VOID +SYMCRYPT_CALL +SymCryptMd5AppendBlocks( + _Inout_ SYMCRYPT_MD5_CHAINING_STATE * pChain, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ); + + +// +// SymCryptMd4AppendBlocks +// +// Updates the chaining state of the hash function with one or more blocks of data. +// Each block is 64 bytes long, the natural size of a MD5 input block. +// +// cbData must be a multiple of 64. +// +VOID +SYMCRYPT_CALL +SymCryptMd4AppendBlocks( + _Inout_ SYMCRYPT_MD4_CHAINING_STATE * pChain, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ); + + +// +// SymCryptMd2AppendBlock +// +// Update the C and X state based on the message block in the buffer. +// +VOID +SYMCRYPT_CALL +SymCryptMd2AppendBlocks( + _Inout_ SYMCRYPT_MD2_CHAINING_STATE * pChain, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ); + + +// +// SymCryptUint32ToMsbFirst +// +// Convert an array of UINT32s to 4-byte values stored MSB first (big-endian) conversion. +// Note that the count is the number of UINT32s to convert, not the number +// of bytes. This is somewhat unusual, but it avoids any confusion about +// converting an odd number of bytes. +// +VOID +SYMCRYPT_CALL +SymCryptUint32ToMsbFirst( _In_reads_(cuData) PCUINT32 puData, + _Out_writes_(4*cuData) PBYTE pbResult, + SIZE_T cuData ); + +// +// SymCryptUint32ToLsbFirst +// +// Convert an array of UINT32s to 4-byte values stored LSB first (little-endian) conversion. +// Note that the count is the number of UINT32s to convert, not the number +// of bytes. This is somewhat unusual, but it avoids any confusion about +// converting an odd number of bytes. +// +VOID +SYMCRYPT_CALL +SymCryptUint32ToLsbFirst( _In_reads_(cuData) PCUINT32 puData, + _Out_writes_(4*cuData) PBYTE pbResult, + SIZE_T cuData ); + +// +// SymCryptMsbFirstToUint32 +// +// Convert an array of 4-byte values stored MSB first to an array of UINT32s +// (big-endian) conversion. +// Note that the count is the number of UINT32s to convert, not the number +// of bytes. This is somewhat unusual, but it avoids any confusion about +// converting an odd number of bytes. +// +VOID +SYMCRYPT_CALL +SymCryptMsbFirstToUint32( _In_reads_(4*cuResult) PCBYTE pbData, + _Out_writes_(cuResult) PUINT32 puResult, + SIZE_T cuResult ); + +// +// SymCryptLsbFirstToUint32 +// +// Convert an array of 4-byte values stored LSB first to an array of UINT32s +// (little-endian) conversion. +// Note that the count is the number of UINT32s to convert, not the number +// of bytes. This is somewhat unusual, but it avoids any confusion about +// converting an odd number of bytes. +// +VOID +SYMCRYPT_CALL +SymCryptLsbFirstToUint32( _In_reads_(4*cuResult) PCBYTE pbData, + _Out_writes_(cuResult) PUINT32 puResult, + SIZE_T cuResult ); + +// +// SymCryptUint64ToMsbFirst +// +// Convert an array of UINT64s to an array of bytes using the MSB first +// (big-endian) conversion. +// +VOID +SYMCRYPT_CALL +SymCryptUint64ToMsbFirst( _In_reads_(cuData) PCUINT64 puData, + _Out_writes_(8*cuData) PBYTE pbResult, + SIZE_T cuData ); + +// +// SymCryptMsbFirstToUint64 +// +// Convert an array of 4-byte values stored MSB first to an array of UINT64s +// (big-endian) conversion. +// Note that the count is the number of UINT64s to convert, not the number +// of bytes. This is somewhat unusual, but it avoids any confusion about +// converting an odd number of bytes. +// +VOID +SYMCRYPT_CALL +SymCryptMsbFirstToUint64( _In_reads_(8*cuResult) PCBYTE pbData, + _Out_writes_(cuResult) PUINT64 puResult, + SIZE_T cuResult ); + + + +//============================================================================ +// HMAC macros and inline functions. +// +#define REPEAT_BYTE_TO_UINT32( x ) (((UINT32)x << 24) | ((UINT32)x << 16) | ((UINT32)x << 8) | x) +#define REPEAT_BYTE_TO_UINT64( x ) ( ((UINT64)REPEAT_BYTE_TO_UINT32(x) << 32) | REPEAT_BYTE_TO_UINT32(x) ) + +// +// The XorByteIntoBuffer function is a platform-optimized function to xor a byte +// repeatedly into a buffer. +// Note that the buffer length must be a multiple of 8. +// +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM | SYMCRYPT_CPU_ARM64 +static +FORCEINLINE +VOID +SYMCRYPT_CALL +XorByteIntoBuffer( _Inout_updates_( 8*cqBuf ) PBYTE pbBuf, SIZE_T cqBuf, BYTE v ) +{ + SIZE_T i; + const UINT64 v64 = REPEAT_BYTE_TO_UINT64( v ); + + for( i=0; i<cqBuf; i++ ) + { + ((UINT64 *)pbBuf)[i] ^= v64; + } +} +#else +static +FORCEINLINE +VOID +SYMCRYPT_CALL +XorByteIntoBuffer( _Inout_updates_( 8*cqBuf ) PBYTE pbBuf, SIZE_T cqBuf, BYTE v ) +{ + SIZE_T i; + + for( i=0; i<8*cqBuf; i++ ) + { + pbBuf[i] ^= v; + } +} +#endif + +// +// GHASH +// + +VOID +SYMCRYPT_CALL +SymCryptGHashExpandKey( + _Out_ PSYMCRYPT_GHASH_EXPANDED_KEY expandedKey, + _In_reads_( SYMCRYPT_GF128_BLOCK_SIZE ) PCBYTE pH ); + +VOID +SYMCRYPT_CALL +SymCryptGHashExpandKeyC( + _Out_writes_( SYMCRYPT_GF128_FIELD_SIZE ) PSYMCRYPT_GF128_ELEMENT expandedKey, + _In_reads_( SYMCRYPT_GF128_BLOCK_SIZE ) PCBYTE pH ); + +VOID +SYMCRYPT_CALL +SymCryptGHashExpandKeyX86( + _Out_ PSYMCRYPT_GHASH_EXPANDED_KEY expandedKey, + _In_reads_( SYMCRYPT_GF128_BLOCK_SIZE ) PCBYTE pH ); + +VOID +SYMCRYPT_CALL +SymCryptGHashExpandKeyAmd64( + _Out_writes_( SYMCRYPT_GF128_FIELD_SIZE ) PSYMCRYPT_GF128_ELEMENT expandedKey, + _In_reads_( SYMCRYPT_GF128_BLOCK_SIZE ) PCBYTE pH ); + +// +// For all GHashAppendData functions, data will be appended in multiples of SYMCRYPT_GF128_BLOCK_SIZE. +// If the data is not a multiple of SYMCRYPT_GF128_BLOCK_SIZE, any remaining data will be ignored. +// + +VOID +SYMCRYPT_CALL +SymCryptGHashAppendData( + _In_ PCSYMCRYPT_GHASH_EXPANDED_KEY expandedKey, + _Inout_ PSYMCRYPT_GF128_ELEMENT pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptGHashAppendDataC( + _In_reads_( SYMCRYPT_GF128_FIELD_SIZE ) PCSYMCRYPT_GF128_ELEMENT expandedKeyTable, + _Inout_ PSYMCRYPT_GF128_ELEMENT pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptGHashAppendDataXmm( + _In_reads_( SYMCRYPT_GF128_FIELD_SIZE ) PCSYMCRYPT_GF128_ELEMENT expandedKeyTable, + _Inout_ PSYMCRYPT_GF128_ELEMENT pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptGHashAppendDataNeon( + _In_reads_( SYMCRYPT_GF128_FIELD_SIZE ) PCSYMCRYPT_GF128_ELEMENT expandedKeyTable, + _Inout_ PSYMCRYPT_GF128_ELEMENT pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptGHashAppendDataPclmulqdq( + _In_reads_( SYMCRYPT_GF128_FIELD_SIZE ) PCSYMCRYPT_GF128_ELEMENT expandedKeyTable, + _Inout_ PSYMCRYPT_GF128_ELEMENT pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptGHashResult( + _In_ PCSYMCRYPT_GF128_ELEMENT pState, + _Out_writes_( SYMCRYPT_GF128_BLOCK_SIZE ) PBYTE pbResult ); + + +VOID +SYMCRYPT_CALL +SymCryptMarvin32AppendBlocks( + _Inout_ PSYMCRYPT_MARVIN32_CHAINING_STATE pChain, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + + + + +extern const BYTE SymCryptTestMsg3[3]; +extern const BYTE SymCryptTestMsg16[16]; +extern const BYTE SymCryptTestKey32[32]; + +VOID +SYMCRYPT_CALL +SymCryptInjectError( PBYTE pbData, SIZE_T cbData ); + + +#define SYMCRYPT_CPUID_DETECT_FLAG_CHECK_OS_SUPPORT_FOR_YMM 1 // enable checking of OSXSAVE bit & XGETBV logic + +VOID +SYMCRYPT_CALL +SymCryptDetectCpuFeaturesByCpuid( UINT32 flags ); + +VOID +SYMCRYPT_CALL +SymCryptDetectCpuFeaturesFromRegisters(void); + +VOID +SYMCRYPT_CALL +SymCryptDetectCpuFeaturesFromRegistersNoTry(void); + +VOID +SYMCRYPT_CALL +SymCryptDetectCpuFeaturesFromIsProcessorFeaturePresent(void); + +VOID +SYMCRYPT_CALL +SymCryptCpuidExFunc( int cpuInfo[4], int function_id, int subfunction_id ); + +//////////////////////////////////////////////////////////////////////////// +// Export blob formats +//////////////////////////////////////////////////////////////////////// + +//========================================================== +// BLOBS +// +// SYMCRYPT_BLOB_HEADER +// Generic header for all exported blobs from SymCrypt +// + +typedef enum _SYMCRYPT_BLOB_TYPE { + SymCryptBlobTypeUnknown = 0, + SymCryptBlobTypeHashState = 0x100, + SymCryptBlobTypeMd2State = SymCryptBlobTypeHashState + 1, // explicit constants as these have to remain the same forever. + SymCryptBlobTypeMd4State = SymCryptBlobTypeHashState + 2, + SymCryptBlobTypeMd5State = SymCryptBlobTypeHashState + 3, + SymCryptBlobTypeSha1State = SymCryptBlobTypeHashState + 4, + SymCryptBlobTypeSha256State = SymCryptBlobTypeHashState + 5, + SymCryptBlobTypeSha384State = SymCryptBlobTypeHashState + 6, + SymCryptBlobTypeSha512State = SymCryptBlobTypeHashState + 7, + SymCryptBlobTypeSha3_256State = SymCryptBlobTypeHashState + 8, + SymCryptBlobTypeSha3_384State = SymCryptBlobTypeHashState + 9, + SymCryptBlobTypeSha3_512State = SymCryptBlobTypeHashState + 10, + SymCryptBlobTypeSha224State = SymCryptBlobTypeHashState + 11, + SymCryptBlobTypeSha512_224State = SymCryptBlobTypeHashState + 12, + SymCryptBlobTypeSha512_256State = SymCryptBlobTypeHashState + 13, + SymCryptBlobTypeSha3_224State = SymCryptBlobTypeHashState + 14, +} SYMCRYPT_BLOB_TYPE; + +#define SYMCRYPT_BLOB_MAGIC ('cmys') + +// +// We define all export structures with pack=1 so that there are no padding bytes. +// +#pragma pack(push, 1) + +typedef struct _SYMCRYPT_BLOB_HEADER { + UINT32 magic; // 'cmys' + UINT32 size; // total size of blob + UINT32 type; // SYMCRYPT_BLOB_TYPE: type of blob +} SYMCRYPT_BLOB_HEADER, *PSYMCRYPT_BLOB_HEADER; + +typedef struct _SYMCRYPT_BLOB_TRAILER { + BYTE checksum[8]; // contains the Marvin32 checksum of the rest of the blob +} SYMCRYPT_BLOB_TRAILER, *PSYMCRYPT_BLOB_TRAILER; + +typedef struct _SYMCRYPT_MD2_STATE_EXPORT_BLOB { + SYMCRYPT_BLOB_HEADER header; + BYTE C[16]; + BYTE X[16]; + UINT32 bytesInBuffer; + BYTE buffer[16]; + BYTE rfu[8]; // rfu = Reserved for Future Use. + SYMCRYPT_BLOB_TRAILER trailer; +} SYMCRYPT_MD2_STATE_EXPORT_BLOB; + +C_ASSERT( sizeof( SYMCRYPT_MD2_STATE_EXPORT_BLOB ) == SYMCRYPT_MD2_STATE_EXPORT_SIZE ); + + +typedef struct _SYMCRYPT_MD4_STATE_EXPORT_BLOB { + SYMCRYPT_BLOB_HEADER header; + BYTE chain[16]; // In the same format used for the final hash value of MD4 + UINT64 dataLength; + BYTE buffer[64]; + BYTE rfu[8]; // rfu = Reserved for Future Use. + SYMCRYPT_BLOB_TRAILER trailer; +} SYMCRYPT_MD4_STATE_EXPORT_BLOB; + +C_ASSERT( sizeof( SYMCRYPT_MD4_STATE_EXPORT_BLOB ) == SYMCRYPT_MD4_STATE_EXPORT_SIZE ); + + +typedef struct _SYMCRYPT_MD5_STATE_EXPORT_BLOB { + SYMCRYPT_BLOB_HEADER header; + BYTE chain[16]; // In the same format used for the final hash value of MD5 + UINT64 dataLength; + BYTE buffer[64]; + BYTE rfu[8]; // rfu = Reserved for Future Use. + SYMCRYPT_BLOB_TRAILER trailer; +} SYMCRYPT_MD5_STATE_EXPORT_BLOB; + +C_ASSERT( sizeof( SYMCRYPT_MD5_STATE_EXPORT_BLOB ) == SYMCRYPT_MD5_STATE_EXPORT_SIZE ); + + +typedef struct _SYMCRYPT_SHA1_STATE_EXPORT_BLOB { + SYMCRYPT_BLOB_HEADER header; + BYTE chain[20]; // in the same format used for the final hash value of SHA-1 + UINT64 dataLength; + BYTE buffer[64]; + BYTE rfu[8]; // rfu = Reserved for Future Use. + SYMCRYPT_BLOB_TRAILER trailer; +} SYMCRYPT_SHA1_STATE_EXPORT_BLOB; + +C_ASSERT( sizeof( SYMCRYPT_SHA1_STATE_EXPORT_BLOB ) == SYMCRYPT_SHA1_STATE_EXPORT_SIZE ); + + +typedef struct _SYMCRYPT_SHA256_STATE_EXPORT_BLOB { + SYMCRYPT_BLOB_HEADER header; + BYTE chain[32]; // in the same format used for the final hash value of SHA-256 + UINT64 dataLength; + BYTE buffer[64]; + BYTE rfu[8]; // rfu = Reserved for Future Use. + SYMCRYPT_BLOB_TRAILER trailer; +} SYMCRYPT_SHA256_STATE_EXPORT_BLOB; + +C_ASSERT( sizeof( SYMCRYPT_SHA256_STATE_EXPORT_BLOB ) == SYMCRYPT_SHA256_STATE_EXPORT_SIZE ); + + +typedef struct _SYMCRYPT_SHA512_STATE_EXPORT_BLOB { + SYMCRYPT_BLOB_HEADER header; + BYTE chain[64]; // in the same format used for the final hash value of SHA-512 + UINT64 dataLengthL; // low 64 bits of data length + UINT64 dataLengthH; // high 64 bits of data length + BYTE buffer[128]; + BYTE rfu[8]; // rfu = Reserved for Future Use. + SYMCRYPT_BLOB_TRAILER trailer; +} SYMCRYPT_SHA512_STATE_EXPORT_BLOB; + +C_ASSERT( sizeof( SYMCRYPT_SHA512_STATE_EXPORT_BLOB ) == SYMCRYPT_SHA512_STATE_EXPORT_SIZE ); + +// Refer to SYMCRYPT_KECCAK_STATE documentation for the explanation of each struct member +typedef struct _SYMCRYPT_KECCAK_STATE_EXPORT_BLOB { + SYMCRYPT_BLOB_HEADER header; + BYTE state[200]; + UINT32 stateIndex; + UINT8 paddingValue; + BOOLEAN squeezeMode; + BYTE rfu[8]; // rfu = Reserved for Future Use. + SYMCRYPT_BLOB_TRAILER trailer; +} SYMCRYPT_KECCAK_STATE_EXPORT_BLOB; + +typedef SYMCRYPT_KECCAK_STATE_EXPORT_BLOB SYMCRYPT_SHA3_224_STATE_EXPORT_BLOB; +typedef SYMCRYPT_KECCAK_STATE_EXPORT_BLOB SYMCRYPT_SHA3_256_STATE_EXPORT_BLOB; +typedef SYMCRYPT_KECCAK_STATE_EXPORT_BLOB SYMCRYPT_SHA3_384_STATE_EXPORT_BLOB; +typedef SYMCRYPT_KECCAK_STATE_EXPORT_BLOB SYMCRYPT_SHA3_512_STATE_EXPORT_BLOB; + +C_ASSERT(sizeof(SYMCRYPT_SHA3_224_STATE_EXPORT_BLOB) == SYMCRYPT_SHA3_224_STATE_EXPORT_SIZE); +C_ASSERT(sizeof(SYMCRYPT_SHA3_256_STATE_EXPORT_BLOB) == SYMCRYPT_SHA3_256_STATE_EXPORT_SIZE); +C_ASSERT(sizeof(SYMCRYPT_SHA3_384_STATE_EXPORT_BLOB) == SYMCRYPT_SHA3_384_STATE_EXPORT_SIZE); +C_ASSERT(sizeof(SYMCRYPT_SHA3_512_STATE_EXPORT_BLOB) == SYMCRYPT_SHA3_512_STATE_EXPORT_SIZE); + +#pragma pack(pop) + +///////////////////////////////////////////// +// AES internal functions + +extern const SYMCRYPT_BLOCKCIPHER SymCryptAesBlockCipherNoOpt; + +VOID +SYMCRYPT_CALL +SymCryptAes4Sbox( + _In_reads_(4) PCBYTE pIn, + _Out_writes_(4) PBYTE pOut, + BOOL UseSimd ); + +VOID +SYMCRYPT_CALL +SymCryptAes4SboxC( + _In_reads_(4) PCBYTE pIn, + _Out_writes_(4) PBYTE pOut ); + +VOID +SYMCRYPT_CALL +SymCryptAes4SboxXmm( + _In_reads_(4) PCBYTE pIn, + _Out_writes_(4) PBYTE pOut ); + +VOID +SYMCRYPT_CALL +SymCryptAes4SboxNeon( + _In_reads_(4) PCBYTE pIn, + _Out_writes_(4) PBYTE pOut ); + +VOID +SYMCRYPT_CALL +SymCryptAesCreateDecryptionRoundKey( + _In_reads_(16) PCBYTE pEncryptionRoundKey, + _Out_writes_(16) PBYTE pDecryptionRoundKey, + BOOL UseSimd ); + +VOID +SYMCRYPT_CALL +SymCryptAesCreateDecryptionRoundKeyC( + _In_reads_(16) PCBYTE pEncryptionRoundKey, + _Out_writes_(16) PBYTE pDecryptionRoundKey ); + +VOID +SYMCRYPT_CALL +SymCryptAesCreateDecryptionRoundKeyXmm( + _In_reads_(16) PCBYTE pEncryptionRoundKey, + _Out_writes_(16) PBYTE pDecryptionRoundKey ); + +VOID +SYMCRYPT_CALL +SymCryptAesCreateDecryptionRoundKeyNeon( + _In_reads_(16) PCBYTE pEncryptionRoundKey, + _Out_writes_(16) PBYTE pDecryptionRoundKey ); + +VOID +SYMCRYPT_CALL +SymCryptAesEncryptC( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbDst ); + +VOID +SYMCRYPT_CALL +SymCryptAesEncryptAsm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbDst ); + +VOID +SYMCRYPT_CALL +SymCryptAesEncryptXmm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbDst ); + +VOID +SYMCRYPT_CALL +SymCryptAesEncryptNeon( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbDst ); + +VOID +SYMCRYPT_CALL +SymCryptAesDecryptC( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbDst ); + +VOID +SYMCRYPT_CALL +SymCryptAesDecryptAsm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbDst ); + +VOID +SYMCRYPT_CALL +SymCryptAesDecryptXmm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbDst ); + +VOID +SYMCRYPT_CALL +SymCryptAesDecryptNeon( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PCBYTE pbSrc, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbDst ); + +VOID +SYMCRYPT_CALL +SymCryptAesEcbEncryptC( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); +VOID +SYMCRYPT_CALL +SymCryptAesEcbEncryptAsm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); +VOID +SYMCRYPT_CALL +SymCryptAesEcbEncryptXmm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesEcbEncryptNeon( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesEcbDecryptC( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesCbcEncryptAsm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); +VOID +SYMCRYPT_CALL +SymCryptAesCbcEncryptXmm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesCbcEncryptNeon( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesCbcDecryptAsm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesCbcDecryptXmm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesCbcDecryptNeon( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesCbcMacXmm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesCbcMacNeon( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesCtrMsb64Asm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesCtrMsb64Xmm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesCtrMsb64Neon( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesCtrMsb32Xmm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesCtrMsb32Neon( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptXtsAesEncryptDataUnitC( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbTweakBlock, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptXtsAesDecryptDataUnitC( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbTweakBlock, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptXtsAesEncryptDataUnitAsm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbTweakBlock, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptXtsAesDecryptDataUnitAsm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbTweakBlock, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +// pbScratch must currently be 16B aligned +VOID +SYMCRYPT_CALL +SymCryptXtsAesEncryptDataUnitXmm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbTweakBlock, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE*16 ) PBYTE pbScratch, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +// pbScratch must currently be 16B aligned +VOID +SYMCRYPT_CALL +SymCryptXtsAesDecryptDataUnitXmm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbTweakBlock, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE*16 ) PBYTE pbScratch, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptXtsAesEncryptDataUnitZmm_2048( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbTweakBlock, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE*16 ) PBYTE pbScratch, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptXtsAesDecryptDataUnitZmm_2048( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbTweakBlock, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE*16 ) PBYTE pbScratch, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptXtsAesEncryptDataUnitYmm_2048( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbTweakBlock, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE*16 ) PBYTE pbScratch, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptXtsAesDecryptDataUnitYmm_2048( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbTweakBlock, + _Out_writes_( SYMCRYPT_AES_BLOCK_SIZE*16 ) PBYTE pbScratch, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptXtsAesEncryptDataUnitNeon( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbTweakBlock, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptXtsAesDecryptDataUnitNeon( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbTweakBlock, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptXtsEncryptDataUnit( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ PCVOID pExpandedKey, + _Inout_updates_( pBlockCipher->blockSize ) PBYTE pbTweakBlock, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptXtsDecryptDataUnit( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ PCVOID pExpandedKey, + _Inout_updates_( pBlockCipher->blockSize ) PBYTE pbTweakBlock, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesGcmEncryptStitchedXmm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( SYMCRYPT_GF128_FIELD_SIZE ) PCSYMCRYPT_GF128_ELEMENT expandedKeyTable, + _Inout_ PSYMCRYPT_GF128_ELEMENT pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesGcmDecryptStitchedXmm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( SYMCRYPT_GF128_FIELD_SIZE ) PCSYMCRYPT_GF128_ELEMENT expandedKeyTable, + _Inout_ PSYMCRYPT_GF128_ELEMENT pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +#define GCM_YMM_MINBLOCKS 16 + +// Caller must check cbData >= GCM_YMM_MINBLOCKS * SYMCRYPT_GCM_BLOCK_SIZE +VOID +SYMCRYPT_CALL +SymCryptAesGcmEncryptStitchedYmm_2048( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( SYMCRYPT_GF128_FIELD_SIZE ) PCSYMCRYPT_GF128_ELEMENT expandedKeyTable, + _Inout_ PSYMCRYPT_GF128_ELEMENT pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +// Caller must check cbData >= GCM_YMM_MINBLOCKS * SYMCRYPT_GCM_BLOCK_SIZE +VOID +SYMCRYPT_CALL +SymCryptAesGcmDecryptStitchedYmm_2048( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( SYMCRYPT_GF128_FIELD_SIZE ) PCSYMCRYPT_GF128_ELEMENT expandedKeyTable, + _Inout_ PSYMCRYPT_GF128_ELEMENT pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesGcmEncryptStitchedNeon( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( SYMCRYPT_GF128_FIELD_SIZE ) PCSYMCRYPT_GF128_ELEMENT expandedKeyTable, + _Inout_ PSYMCRYPT_GF128_ELEMENT pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesGcmDecryptStitchedNeon( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( SYMCRYPT_GF128_FIELD_SIZE ) PCSYMCRYPT_GF128_ELEMENT expandedKeyTable, + _Inout_ PSYMCRYPT_GF128_ELEMENT pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesGcmEncryptPart( + _Inout_ PSYMCRYPT_GCM_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptAesGcmDecryptPart( + _Inout_ PSYMCRYPT_GCM_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptGcmEncryptPartTwoPass( + _Inout_ PSYMCRYPT_GCM_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptGcmDecryptPartTwoPass( + _Inout_ PSYMCRYPT_GCM_STATE pState, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptCtrMsb32( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ PCVOID pExpandedKey, + _Inout_updates_( pBlockCipher->blockSize ) + PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); +// +// SymCryptCtrMsb32 implements the CTR cipher mode with a 32-bit increment function. +// It is not intended to be used as-is, rather it is a building block for modes like GCM. +// See the description of SymCryptCtrMsb64 in symcrypt.h for more details. +// +// For now, this function is only intended for use with GCM, which specifies the use a +// 32-bit increment function. It's only used in cases where we can't use one of the optimized +// implementations (i.e. on ARM32 or x86[-64] without AESNI). Therefore, unlike the 64-bit version, +// there are no optimized implementations of the CTR function to call. If we ever need this +// functionality for other block cipher modes, this function will need to be updated and we'll +// need to add an additional pointer to SYMCRYPT_BLOCKCIPHER for the optimized CTR function. + +VOID +SYMCRYPT_CALL +SymCryptAesCtrMsb32( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbChainingValue, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ); + +// SymCryptAesCtrMsb32 is a dispatch function for the optimized AES CTR implementations that use +//a 32-bit counter function (currently only relevant to GCM). + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptParallelHashProcess_serial( + _In_ PCSYMCRYPT_PARALLEL_HASH pParHash, + _Inout_updates_bytes_( nStates * pParHash->pHash->stateSize ) PVOID pStates, + SIZE_T nStates, + _Inout_updates_( nOperations ) PSYMCRYPT_PARALLEL_HASH_OPERATION pOperations, + SIZE_T nOperations, + _Out_writes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptParallelHashProcess( + _In_ PCSYMCRYPT_PARALLEL_HASH pParHash, + _Inout_updates_bytes_( nStates * pParHash->pHash->stateSize ) PVOID pStates, + SIZE_T nStates, + _Inout_updates_( nOperations ) PSYMCRYPT_PARALLEL_HASH_OPERATION pOperations, + SIZE_T nOperations, + _Out_writes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch, + UINT32 maxParallel ); + +VOID +SYMCRYPT_CALL +SymCryptHashAppendInternal( + _In_ PCSYMCRYPT_HASH pHash, + _Inout_ PSYMCRYPT_COMMON_HASH_STATE pState, + _In_reads_bytes_( cbData ) PCBYTE pbData, + SIZE_T cbData ); + +VOID +SYMCRYPT_CALL +SymCryptHashCommonPaddingMd4Style( + _In_ PCSYMCRYPT_HASH pHash, + _Inout_ PSYMCRYPT_COMMON_HASH_STATE pState ); + + +extern const PCSYMCRYPT_PARALLEL_HASH SymCryptParallelSha256Algorithm; +extern const PCSYMCRYPT_PARALLEL_HASH SymCryptParallelSha384Algorithm; +extern const PCSYMCRYPT_PARALLEL_HASH SymCryptParallelSha512Algorithm; + +#define PAR_SCRATCH_ELEMENTS_256 (4+8+64) // # scratch elements our parallel SHA256 implementations need +#define PAR_SCRATCH_ELEMENTS_512 (4+8+80) // # scratch elements our parallel SHA512 implementations need + +// pScratch must be 32B aligned, as it is used as an array of __m256i +VOID +SYMCRYPT_CALL +SymCryptParallelSha256AppendBlocks_ymm( + _Inout_updates_( 8 ) PSYMCRYPT_SHA256_CHAINING_STATE * pChain, + _Inout_updates_( 8 ) PCBYTE * ppByte, + SIZE_T nBytes, + _Out_writes_( PAR_SCRATCH_ELEMENTS_256 * 32 ) PBYTE pScratch ); + +// pScratch must be 32B aligned, as it is used as an array of __m256i +VOID +SYMCRYPT_CALL +SymCryptParallelSha512AppendBlocks_ymm( + _Inout_updates_( 4 ) PSYMCRYPT_SHA512_CHAINING_STATE * pChain, + _Inout_updates_( 4 ) PCBYTE * ppByte, + SIZE_T nBytes, + _Out_writes_( PAR_SCRATCH_ELEMENTS_512 * 32 ) PBYTE pScratch ); + +extern const SYMCRYPT_HASH SymCryptMd2Algorithm_default; +extern const SYMCRYPT_HASH SymCryptMd4Algorithm_default; +extern const SYMCRYPT_HASH SymCryptMd5Algorithm_default; +extern const SYMCRYPT_HASH SymCryptSha1Algorithm_default; +extern const SYMCRYPT_HASH SymCryptSha224Algorithm_default; +extern const SYMCRYPT_HASH SymCryptSha256Algorithm_default; +extern const SYMCRYPT_HASH SymCryptSha384Algorithm_default; +extern const SYMCRYPT_HASH SymCryptSha512Algorithm_default; +extern const SYMCRYPT_HASH SymCryptSha512_224Algorithm_default; +extern const SYMCRYPT_HASH SymCryptSha512_256Algorithm_default; +extern const SYMCRYPT_HASH SymCryptSha3_224Algorithm_default; +extern const SYMCRYPT_HASH SymCryptSha3_256Algorithm_default; +extern const SYMCRYPT_HASH SymCryptSha3_384Algorithm_default; +extern const SYMCRYPT_HASH SymCryptSha3_512Algorithm_default; +extern const SYMCRYPT_HASH SymCryptShake128HashAlgorithm_default; +extern const SYMCRYPT_HASH SymCryptShake256HashAlgorithm_default; + + + +// Paddings used by various SHA-3 derived algorithms +#define SYMCRYPT_SHA3_PADDING_VALUE 0x06 // 01 10* padding +#define SYMCRYPT_SHAKE_PADDING_VALUE 0x1f // 11 11 10* padding +#define SYMCRYPT_CSHAKE_PADDING_VALUE 0x04 // 00 10* padding (used when N or S are non-empty strings) + +// +// Functions operating on the Keccak state +// + +VOID +SYMCRYPT_CALL +SymCryptKeccakPermute(_Inout_updates_(25) UINT64* pState); +// Keccak-f[1600] permutation + +VOID +SYMCRYPT_CALL +SymCryptKeccakInit(_Out_ PSYMCRYPT_KECCAK_STATE pState, UINT32 inputBlockSize, UINT8 padding); + +VOID +SYMCRYPT_CALL +SymCryptKeccakReset(_Out_ PSYMCRYPT_KECCAK_STATE pState); + +VOID +SYMCRYPT_CALL +SymCryptKeccakZeroAppendBlock(_Inout_ PSYMCRYPT_KECCAK_STATE pState); +// Zero pads the current block by invoking the permutation and setting +// pState->stateIndex to 0. + +VOID +SYMCRYPT_CALL +SymCryptKeccakAppend( + _Inout_ PSYMCRYPT_KECCAK_STATE pState, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData); +// Generic append function. + +VOID +SYMCRYPT_CALL +SymCryptKeccakExtract( + _Inout_ PSYMCRYPT_KECCAK_STATE pState, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult, + BOOLEAN bWipe); +// Generic extract function, no restriction on cbResult. +// bWipe denotes whether to wipe the Keccak state and initialize it +// for a new computation. + +VOID +SYMCRYPT_CALL +SymCryptKeccakStateExport( + SYMCRYPT_BLOB_TYPE type, + _In_ PCSYMCRYPT_KECCAK_STATE pState, + _Out_writes_bytes_(SYMCRYPT_KECCAK_STATE_EXPORT_SIZE) PBYTE pbBlob); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptKeccakStateImport( + SYMCRYPT_BLOB_TYPE type, + _Out_ PSYMCRYPT_KECCAK_STATE pState, + _In_reads_bytes_(SYMCRYPT_KECCAK_STATE_EXPORT_SIZE) PCBYTE pbBlob); + +VOID +SYMCRYPT_CALL +SymCryptKeccakAppendEncodeTimes8( + _Inout_ SYMCRYPT_KECCAK_STATE *pState, + UINT64 uValue, + BOOLEAN bLeftEncode); +// Appends the left-encoding of uValue * 8 to the state + +VOID +SYMCRYPT_CALL +SymCryptKeccakAppendEncodedString( + _Inout_ PSYMCRYPT_KECCAK_STATE pState, + _In_reads_(cbString) PCBYTE pbString, + SIZE_T cbString); +// Appends 'left_encode(cbString * 8) || pbString' to the state + +VOID +SYMCRYPT_CALL +SymCryptCShakeEncodeInputStrings( + _Inout_ PSYMCRYPT_KECCAK_STATE pState, + _In_reads_( cbFunctionNameString ) PCBYTE pbFunctionNameString, + SIZE_T cbFunctionNameString, + _In_reads_( cbCustomizationString ) PCBYTE pbCustomizationString, + SIZE_T cbCustomizationString); +// Process CShake input strings +// Appends byte_pad( encode_string( pbFunctionNameString ) || encode_string( pbCustomizationString ), pState->inputBlockSize ) + + + +VOID +SYMCRYPT_CALL +SymCryptFatalIntercept( UINT32 fatalCode ); + +extern const BYTE SymCryptSha256KATAnswer[32]; +extern const BYTE SymCryptSha384KATAnswer[48]; +extern const BYTE SymCryptSha512KATAnswer[64]; + +// +// Arithmetic +// + +#define SYMCRYPT_ASSERT_ASYM_ALIGNED( _p ) SYMCRYPT_ASSERT( ((SIZE_T)(_p) & (SYMCRYPT_ASYM_ALIGN_VALUE - 1)) == 0 ); + + +#define SYMCRYPT_FDEF_DIGIT_NUINT32 ((UINT32)(SYMCRYPT_FDEF_DIGIT_SIZE / sizeof( UINT32 ) )) + +#define SYMCRYPT_OBJ_NDIGITS( _p ) ((_p)->nDigits) +#define SYMCRYPT_OBJ_NBYTES( _p ) ((_p)->nDigits * SYMCRYPT_FDEF_DIGIT_SIZE) +#define SYMCRYPT_OBJ_NUINT32( _p ) ((_p)->nDigits * SYMCRYPT_FDEF_DIGIT_SIZE / sizeof( UINT32 )) + +#if SYMCRYPT_MS_VC +#define SYMCRYPT_MUL32x32TO64( _a, _b ) UInt32x32To64( (_a), (_b) ) +#elif SYMCRYPT_GNUC +#define SYMCRYPT_MUL32x32TO64( _a, _b ) ( (UINT64)(_a)*(UINT64)(_b) ) +#else + #error Unknown compiler +#endif +typedef VOID (SYMCRYPT_CALL * SYMCRYPT_MOD_BINARY_OP_FN)( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +typedef VOID (SYMCRYPT_CALL * SYMCRYPT_MOD_UNARY_OP_FN)( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +typedef SYMCRYPT_ERROR (SYMCRYPT_CALL * SYMCRYPT_MOD_UNARY_OP_FLAG_STATUS_FN)( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +typedef VOID (SYMCRYPT_CALL * SYMCRYPT_MOD_SET_POST_FN)( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_ PSYMCRYPT_MODELEMENT peObj, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +typedef PCUINT32 (SYMCRYPT_CALL * SYMCRYPT_MOD_PRE_GET_FN)( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peObj, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +typedef VOID (SYMCRYPT_CALL * SYMCRYPT_MOD_COPY_FN)( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst ); + +typedef VOID (SYMCRYPT_CALL * SYMCRYPT_MODULUS_COPYFIXUP_FN)( + _In_ PCSYMCRYPT_MODULUS pmSrc, + _Out_ PSYMCRYPT_MODULUS pmDst ); + +typedef VOID (SYMCRYPT_CALL * SYMCRYPT_MODULUS_INIT_FN)( + _Inout_ PSYMCRYPT_MODULUS pmObj, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +// +// In the future we might want to implement a 'prepare divisor' for people who want to do one or more modular divisions. +// In EC projective coordinates you have a value stored as (X,Z) with X/Z being the actual value that needs to be exported. +// In Montgomery format, this is stored as (RX, RZ), and just doing RX * (1/RZ) gets you the value to be exported. +// There seem to be many tricks here to get some more speed; maybe we just need to define export functions for each +// point format and allow the Modulus to contain special optimizations. +// +// The SetPost function is the post-processing function of any SetValue operation. The SetValue operation will store the +// modElement in the normal integer format into the ModElement. The SetPost function post-processes it into the proper +// representation for that modulus. +// +// The PreGet function is the pre-processing function to any GetValue operation. It returns a pointer to the proper value +// stored in standard integer format. This pointer can either be into the ModElement itself, or into the scratch space. +// + +typedef struct _SYMCRYPT_MODULAR_FUNCTIONS { + SYMCRYPT_MOD_BINARY_OP_FN modAdd; + SYMCRYPT_MOD_BINARY_OP_FN modSub; + SYMCRYPT_MOD_UNARY_OP_FN modNeg; + SYMCRYPT_MOD_BINARY_OP_FN modMul; + SYMCRYPT_MOD_UNARY_OP_FN modSquare; + SYMCRYPT_MOD_UNARY_OP_FLAG_STATUS_FN modInv; + SYMCRYPT_MOD_SET_POST_FN modSetPost; + SYMCRYPT_MOD_PRE_GET_FN modPreGet; + SYMCRYPT_MODULUS_COPYFIXUP_FN modulusCopyFixup; // non-generic fixup after memcpy + SYMCRYPT_MODULUS_INIT_FN modulusInit; + PVOID slack[6]; +} SYMCRYPT_MODULAR_FUNCTIONS; + +#define SYMCRYPT_MODULAR_FUNCTIONS_SIZE (sizeof( SYMCRYPT_MODULAR_FUNCTIONS ) ) + +extern const SYMCRYPT_MODULAR_FUNCTIONS g_SymCryptModFns[]; +extern const UINT32 g_SymCryptModFnsMask; + +// +// Table entry that contains the information about an implementation. +// Allows generic code to make the decision. +// First entry in the table that is allowed is chosen, last entry always matches everything +// + +#define SYMCRYPT_MODULUS_FEATURE_MONTGOMERY 1 // Modulus is suitable for Montgomery processing +// #define SYMCRYPT_MODULUS_FEATURE_PSEUDO_MERSENNE 2 // Modulus is suitable for Pseudo-Mersenne processing +// #define SYMCRYPT_MODULUS_FEATURE_NISTP256 4 // Modulus is the NIST P256 curve prime +#define SYMCRYPT_MODULUS_FEATURE_NISTP384 8 // Modulus is the NIST P384 curve prime + +typedef struct _SYMCRYPT_MODULUS_TYPE_SELECTION_ENTRY +{ + UINT32 type; // Type value of this solution + SYMCRYPT_CPU_FEATURES cpuFeatures; // Required CPU features + UINT32 maxBits; // Max # bits that the actual value of the modulus is, 0 = no limit + UINT32 modulusFeatures; // Required features of the modulus +} SYMCRYPT_MODULUS_TYPE_SELECTION_ENTRY, *PSYMCRYPT_MODULUS_TYPE_SELECTION_ENTRY; +typedef const SYMCRYPT_MODULUS_TYPE_SELECTION_ENTRY* PCSYMCRYPT_MODULUS_TYPE_SELECTION_ENTRY; + +extern const SYMCRYPT_MODULUS_TYPE_SELECTION_ENTRY SymCryptModulusTypeSelections[]; // Array can be any size... + + +// Check that the size is a power of 2 +C_ASSERT( (SYMCRYPT_MODULAR_FUNCTIONS_SIZE & (SYMCRYPT_MODULAR_FUNCTIONS_SIZE-1)) == 0 ); + +// The macro that we use to call modular functions +#define SYMCRYPT_MOD_CALL(v) ((SYMCRYPT_MODULAR_FUNCTIONS *)(( SYMCRYPT_FORCE_READ32( &(v)->type) & g_SymCryptModFnsMask) + (PBYTE)(&g_SymCryptModFns) ))-> + +#define SYMCRYPT_MOD_FUNCTIONS_FDEF_GENERIC {\ + &SymCryptFdefModAddGeneric,\ + &SymCryptFdefModSubGeneric,\ + &SymCryptFdefModNegGeneric,\ + &SymCryptFdefModMulGeneric,\ + &SymCryptFdefModSquareGeneric,\ + &SymCryptFdefModInvGeneric,\ + &SymCryptFdefModSetPostGeneric,\ + &SymCryptFdefModPreGetGeneric,\ + &SymCryptFdefModulusCopyFixupGeneric,\ + &SymCryptFdefModulusInitGeneric,\ +} + +#define SYMCRYPT_MOD_FUNCTIONS_FDEF_MONTGOMERY {\ + &SymCryptFdefModAddGeneric,\ + &SymCryptFdefModSubGeneric,\ + &SymCryptFdefModNegGeneric,\ + &SymCryptFdefModMulMontgomery,\ + &SymCryptFdefModSquareMontgomery,\ + &SymCryptFdefModInvMontgomery,\ + &SymCryptFdefModSetPostMontgomery,\ + &SymCryptFdefModPreGetMontgomery,\ + &SymCryptFdefModulusCopyFixupMontgomery,\ + &SymCryptFdefModulusInitMontgomery,\ +} + +#define SYMCRYPT_MOD_FUNCTIONS_FDEF_MONTGOMERY_ARM64256 {\ + (SYMCRYPT_MOD_BINARY_OP_FN) &SymCryptFdefModAdd256Asm,\ + (SYMCRYPT_MOD_BINARY_OP_FN) &SymCryptFdefModSub256Asm,\ + &SymCryptFdefModNegGeneric,\ + (SYMCRYPT_MOD_BINARY_OP_FN) &SymCryptFdefModMulMontgomery256Asm, \ + (SYMCRYPT_MOD_UNARY_OP_FN) &SymCryptFdefModSquareMontgomery256Asm, \ + &SymCryptFdefModInvMontgomery,\ + &SymCryptFdefModSetPostMontgomery,\ + &SymCryptFdefModPreGetMontgomery,\ + &SymCryptFdefModulusCopyFixupMontgomery,\ + &SymCryptFdefModulusInitMontgomery,\ +} + +#define SYMCRYPT_MOD_FUNCTIONS_FDEF_MONTGOMERY_ARM64P384 {\ + (SYMCRYPT_MOD_BINARY_OP_FN) &SymCryptFdefModAdd384Asm,\ + (SYMCRYPT_MOD_BINARY_OP_FN) &SymCryptFdefModSub384Asm,\ + &SymCryptFdefModNegGeneric,\ + (SYMCRYPT_MOD_BINARY_OP_FN) &SymCryptFdefModMulMontgomeryP384Asm, \ + (SYMCRYPT_MOD_UNARY_OP_FN) &SymCryptFdefModSquareMontgomeryP384Asm, \ + &SymCryptFdef369ModInvMontgomery,\ + &SymCryptFdef369ModSetPostMontgomery,\ + &SymCryptFdef369ModPreGetMontgomery,\ + &SymCryptFdefModulusCopyFixupMontgomery,\ + &SymCryptFdef369ModulusInitMontgomery,\ +} + +#define SYMCRYPT_MOD_FUNCTIONS_FDEF_MONTGOMERY_MULX256 {\ + (SYMCRYPT_MOD_BINARY_OP_FN) &SymCryptFdefModAddMulx256Asm,\ + (SYMCRYPT_MOD_BINARY_OP_FN) &SymCryptFdefModSub256Asm,\ + &SymCryptFdefModNegGeneric,\ + (SYMCRYPT_MOD_BINARY_OP_FN) &SymCryptFdefModMulMontgomeryMulx256Asm,\ + (SYMCRYPT_MOD_UNARY_OP_FN) &SymCryptFdefModSquareMontgomeryMulx256Asm,\ + &SymCryptFdefModInvMontgomery256,\ + &SymCryptFdefModSetPostMontgomeryMulx256,\ + &SymCryptFdefModPreGetMontgomery256,\ + &SymCryptFdefModulusCopyFixupMontgomery,\ + &SymCryptFdefModulusInitMontgomery256,\ +} + +#define SYMCRYPT_MOD_FUNCTIONS_FDEF_MONTGOMERY_MULXP256 {\ + (SYMCRYPT_MOD_BINARY_OP_FN) &SymCryptFdefModAddMulx256Asm,\ + (SYMCRYPT_MOD_BINARY_OP_FN) &SymCryptFdefModSub256Asm,\ + &SymCryptFdefModNegGeneric,\ + (SYMCRYPT_MOD_BINARY_OP_FN) &SymCryptFdefModMulMontgomeryMulxP256Asm,\ + (SYMCRYPT_MOD_UNARY_OP_FN) &SymCryptFdefModSquareMontgomeryMulxP256Asm,\ + &SymCryptFdefModInvMontgomery256,\ + &SymCryptFdefModSetPostMontgomeryMulx256,\ + &SymCryptFdefModPreGetMontgomery256,\ + &SymCryptFdefModulusCopyFixupMontgomery,\ + &SymCryptFdefModulusInitMontgomery256,\ +} + +#define SYMCRYPT_MOD_FUNCTIONS_FDEF_MONTGOMERY_MULX384 {\ + (SYMCRYPT_MOD_BINARY_OP_FN) &SymCryptFdefModAddMulx384Asm,\ + (SYMCRYPT_MOD_BINARY_OP_FN) &SymCryptFdefModSub384Asm,\ + &SymCryptFdefModNegGeneric,\ + (SYMCRYPT_MOD_BINARY_OP_FN) &SymCryptFdefModMulMontgomeryMulx384Asm,\ + (SYMCRYPT_MOD_UNARY_OP_FN) &SymCryptFdefModSquareMontgomeryMulx384Asm,\ + &SymCryptFdef369ModInvMontgomery,\ + &SymCryptFdefModSetPostMontgomeryMulx384,\ + &SymCryptFdef369ModPreGetMontgomery,\ + &SymCryptFdefModulusCopyFixupMontgomery,\ + &SymCryptFdef369ModulusInitMontgomery,\ +} + +#define SYMCRYPT_MOD_FUNCTIONS_FDEF_MONTGOMERY_MULXP384 {\ + (SYMCRYPT_MOD_BINARY_OP_FN) &SymCryptFdefModAddMulx384Asm,\ + (SYMCRYPT_MOD_BINARY_OP_FN) &SymCryptFdefModSub384Asm,\ + &SymCryptFdefModNegGeneric,\ + (SYMCRYPT_MOD_BINARY_OP_FN) &SymCryptFdefModMulMontgomeryMulxP384Asm,\ + (SYMCRYPT_MOD_UNARY_OP_FN) &SymCryptFdefModSquareMontgomeryMulxP384Asm,\ + &SymCryptFdef369ModInvMontgomery,\ + &SymCryptFdefModSetPostMontgomeryMulxP384,\ + &SymCryptFdef369ModPreGetMontgomery,\ + &SymCryptFdefModulusCopyFixupMontgomery,\ + &SymCryptFdef369ModulusInitMontgomery,\ +} + +#define SYMCRYPT_MOD_FUNCTIONS_FDEF369_MONTGOMERY {\ + &SymCryptFdef369ModAddGeneric,\ + &SymCryptFdef369ModSubGeneric,\ + &SymCryptFdefModNegGeneric,\ + &SymCryptFdef369ModMulMontgomery,\ + &SymCryptFdef369ModSquareMontgomery,\ + &SymCryptFdef369ModInvMontgomery,\ + &SymCryptFdef369ModSetPostMontgomery,\ + &SymCryptFdef369ModPreGetMontgomery,\ + &SymCryptFdefModulusCopyFixupMontgomery,\ + &SymCryptFdef369ModulusInitMontgomery,\ +} + +#define SYMCRYPT_MOD_FUNCTIONS_FDEF_MONTGOMERY_MULX {\ + &SymCryptFdefModAddGeneric,\ + &SymCryptFdefModSubGeneric,\ + &SymCryptFdefModNegGeneric,\ + &SymCryptFdefModMulMontgomeryMulx,\ + &SymCryptFdefModSquareMontgomeryMulx,\ + &SymCryptFdefModInvMontgomery,\ + &SymCryptFdefModSetPostMontgomery,\ + &SymCryptFdefModPreGetMontgomery,\ + &SymCryptFdefModulusCopyFixupMontgomery,\ + &SymCryptFdefModulusInitMontgomery,\ +} + +#define SYMCRYPT_MOD_FUNCTIONS_FDEF_MONTGOMERY512 {\ + &SymCryptFdefModAddGeneric,\ + &SymCryptFdefModSubGeneric,\ + &SymCryptFdefModNegGeneric,\ + &SymCryptFdefModMulMontgomery512,\ + &SymCryptFdefModSquareMontgomery512,\ + &SymCryptFdefModInvMontgomery,\ + &SymCryptFdefModSetPostMontgomery,\ + &SymCryptFdefModPreGetMontgomery,\ + &SymCryptFdefModulusCopyFixupMontgomery,\ + &SymCryptFdefModulusInitMontgomery,\ +} + +#define SYMCRYPT_MOD_FUNCTIONS_FDEF_MONTGOMERY1024 {\ + &SymCryptFdefModAddGeneric,\ + &SymCryptFdefModSubGeneric,\ + &SymCryptFdefModNegGeneric,\ + &SymCryptFdefModMulMontgomery1024,\ + &SymCryptFdefModSquareMontgomery1024,\ + &SymCryptFdefModInvMontgomery,\ + &SymCryptFdefModSetPostMontgomery,\ + &SymCryptFdefModPreGetMontgomery,\ + &SymCryptFdefModulusCopyFixupMontgomery,\ + &SymCryptFdefModulusInitMontgomery,\ +} + +#define SYMCRYPT_MOD_FUNCTIONS_FDEF_MONTGOMERY_MULX1024 {\ + &SymCryptFdefModAddGeneric,\ + &SymCryptFdefModSubGeneric,\ + &SymCryptFdefModNegGeneric,\ + &SymCryptFdefModMulMontgomeryMulx1024,\ + &SymCryptFdefModSquareMontgomeryMulx1024,\ + &SymCryptFdefModInvMontgomery,\ + &SymCryptFdefModSetPostMontgomery,\ + &SymCryptFdefModPreGetMontgomery,\ + &SymCryptFdefModulusCopyFixupMontgomery,\ + &SymCryptFdefModulusInitMontgomery,\ +} + +VOID +SYMCRYPT_CALL +SymCryptFdefMaskedCopy( + _In_reads_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE ) PCBYTE pbSrc, + _Inout_updates_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE ) PBYTE pbDst, + UINT32 nDigits, + UINT32 mask ); +// +// Copies Src to Dst under mask. +// Requirements: +// - mask == 0 or mask == 0xffffffff +// - cbData must be a multiple of the size of a digit, or a multiple of the size of a ModElement. +// - pbSrc and pbDst must be SYMCRYPT_ALIGNed +// if mask == 0 this function does nothing. +// if mask == 0xffffffff this function is a memcpy from Src to Dst. +// This function is side-channel safe; the value of mask is not revealed +// through the memory access patterns. +// + +VOID +SYMCRYPT_CALL +SymCryptFdefConditionalSwap( + _Inout_updates_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE ) PBYTE pbSrc1, + _Inout_updates_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE ) PBYTE pbSrc2, + UINT32 nDigits, + UINT32 cond ); + +// +// Swaps the bytes of Src1 with the bytes of Src2 under a condition. +// Requirements: +// - cond = 0 or cond = 1 . +// - cbData must be a multiple of the size of a digit, or a multiple of the size of a ModElement. +// - pbSrc1 and pbSrc2 must be SYMCRYPT_ALIGNed +// if cond == 0 this function does nothing. +// if cond == 1 this function swaps the bytes of Src1 with the bytes of Src2. +// This function is side-channel safe; the value of cond is not revealed +// through the memory access patterns. +// + +VOID +SYMCRYPT_CALL +SymCryptFdefClaimScratch( PBYTE pbScratch, SIZE_T cbScratch, SIZE_T cbMin ); + +UINT32 +SymCryptFdefDigitsFromBits( UINT32 nBits ); + +PSYMCRYPT_INT +SYMCRYPT_CALL +SymCryptFdefIntAllocate( UINT32 nDigits ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefSizeofIntFromDigits( UINT32 nDigits ); + +PSYMCRYPT_INT +SYMCRYPT_CALL +SymCryptFdefIntCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + UINT32 nDigits ); + +VOID +SymCryptFdefIntCopy( + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_INT piDst ); + +VOID +SymCryptFdefIntMaskedCopy( + _In_ PCSYMCRYPT_INT piSrc, + _Inout_ PSYMCRYPT_INT piDst, + UINT32 mask ); + +VOID +SYMCRYPT_CALL +SymCryptFdefIntConditionalCopy( + _In_ PCSYMCRYPT_INT piSrc, + _Inout_ PSYMCRYPT_INT piDst, + UINT32 cond ); + +VOID +SYMCRYPT_CALL +SymCryptFdefIntConditionalSwap( + _Inout_ PSYMCRYPT_INT piSrc1, + _Inout_ PSYMCRYPT_INT piSrc2, + UINT32 cond ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntBitsizeOfObject( _In_ PCSYMCRYPT_INT piSrc ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefNumberofDigitsFromInt( _In_ PCSYMCRYPT_INT piSrc ); + +SYMCRYPT_ERROR +SymCryptFdefIntCopyMixedSize( + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_INT piDst ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntBitsizeOfValue( _In_ PCSYMCRYPT_INT piSrc ); + +VOID +SYMCRYPT_CALL +SymCryptFdefIntSetValueUint32( + UINT32 u32Src, + _Out_ PSYMCRYPT_INT piDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefIntSetValueUint64( + UINT64 u64Src, + _Out_ PSYMCRYPT_INT piDst ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptFdefIntSetValue( + _In_reads_bytes_(cbSrc) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_NUMBER_FORMAT format, + _Out_ PSYMCRYPT_INT piDst ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptFdefIntGetValue( + _In_ PCSYMCRYPT_INT piSrc, + _Out_writes_bytes_(cbDst) PBYTE pbDst, + SIZE_T cbDst, + SYMCRYPT_NUMBER_FORMAT format ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntGetValueLsbits32( _In_ PCSYMCRYPT_INT piSrc ); + +UINT64 +SYMCRYPT_CALL +SymCryptFdefIntGetValueLsbits64( _In_ PCSYMCRYPT_INT piSrc ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntAddUint32( + _In_ PCSYMCRYPT_INT piSrc1, + UINT32 u32Src2, + _Out_ PSYMCRYPT_INT piDst ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntAddSameSize( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + _Out_ PSYMCRYPT_INT piDst ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntAddMixedSize( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + _Out_ PSYMCRYPT_INT piDst ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntSubUint32( + _In_ PCSYMCRYPT_INT piSrc1, + UINT32 u32Src2, + _Out_ PSYMCRYPT_INT piDst ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntSubSameSize( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + _Out_ PSYMCRYPT_INT piDst ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntSubMixedSize( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + _Out_ PSYMCRYPT_INT piDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefIntNeg( + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_INT piDst ); + + +VOID +SYMCRYPT_CALL +SymCryptFdefIntMulPow2( + _In_ PCSYMCRYPT_INT piSrc, + SIZE_T Exp, + _Out_ PSYMCRYPT_INT piDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefIntDivPow2( + _In_ PCSYMCRYPT_INT piSrc, + SIZE_T exp, + _Out_ PSYMCRYPT_INT piDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefIntShr1( + UINT32 highestBit, + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_INT piDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefIntModPow2( + _In_ PCSYMCRYPT_INT piSrc, + SIZE_T exp, + _Out_ PSYMCRYPT_INT piDst ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntGetBit( + _In_ PCSYMCRYPT_INT piSrc, + UINT32 iBit ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntGetBits( + _In_ PCSYMCRYPT_INT piSrc, + UINT32 iBit, + UINT32 nBits ); + +VOID +SYMCRYPT_CALL +SymCryptFdefIntSetBits( + _In_ PSYMCRYPT_INT piDst, + UINT32 value, + UINT32 iBit, + UINT32 nBits ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntIsEqualUint32( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ UINT32 u32Src2 ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntIsEqual( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2 ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntIsLessThan( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2 ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntMulUint32( + _In_ PCSYMCRYPT_INT piSrc1, + UINT32 Src2, + _Out_ PSYMCRYPT_INT piDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefIntMulSameSize( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + _Out_ PSYMCRYPT_INT piDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +VOID +SYMCRYPT_CALL +SymCryptFdefIntSquare( + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_INT piDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +VOID +SYMCRYPT_CALL +SymCryptFdefIntMulMixedSize( + _In_ PCSYMCRYPT_INT piSrc1, + _In_ PCSYMCRYPT_INT piSrc2, + _Out_ PSYMCRYPT_INT piDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +PSYMCRYPT_DIVISOR +SYMCRYPT_CALL +SymCryptFdefDivisorAllocate( UINT32 nDigits ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefSizeofDivisorFromDigits( UINT32 nDigits ); + +PSYMCRYPT_DIVISOR +SYMCRYPT_CALL +SymCryptFdefDivisorCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + UINT32 nDigits ); + +PSYMCRYPT_DIVISOR +SYMCRYPT_CALL +SymCryptFdefDivisorRetrieveHandle( _In_ PBYTE pbBuffer ); + +VOID +SymCryptFdefDivisorCopy( + _In_ PCSYMCRYPT_DIVISOR pdSrc, + _Out_ PSYMCRYPT_DIVISOR pdDst ); + +VOID +SymCryptFdefDivisorCopyFixup( + _In_ PCSYMCRYPT_DIVISOR pSrc, + _Out_ PSYMCRYPT_DIVISOR pDst ); + +PSYMCRYPT_INT +SYMCRYPT_CALL +SymCryptFdefIntFromDivisor( _In_ PSYMCRYPT_DIVISOR pdSrc ); + +VOID +SYMCRYPT_CALL +SymCryptFdefIntToDivisor( + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_DIVISOR pdDst, + UINT32 totalOperations, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefIntDivMod( + _In_ PCSYMCRYPT_INT piSrc, + _In_ PCSYMCRYPT_DIVISOR pdDivisor, + _Out_opt_ PSYMCRYPT_INT piQuotient, + _Out_opt_ PSYMCRYPT_INT piRemainder, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefRawDivMod( + _In_reads_(nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pNum, + UINT32 nDigits, + _In_ PCSYMCRYPT_DIVISOR pdDivisor, + _Out_writes_opt_(nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pQuotient, + _Out_writes_opt_(SYMCRYPT_OBJ_NUINT32(pdDivisor)) PUINT32 pRemainder, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + + +PSYMCRYPT_MODULUS +SYMCRYPT_CALL +SymCryptFdefModulusAllocate( UINT32 nDigits ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModulusFree( _Out_ PSYMCRYPT_MODULUS pmObj ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefSizeofModulusFromDigits( UINT32 nDigits ); + +PSYMCRYPT_MODULUS +SYMCRYPT_CALL +SymCryptFdefModulusCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + UINT32 nDigits ); + +PSYMCRYPT_MODULUS +SYMCRYPT_CALL +SymCryptFdefModulusRetrieveHandle( _In_ PBYTE pbBuffer ); + + +VOID +SymCryptFdefModulusCopy( + _In_ PCSYMCRYPT_MODULUS pmSrc, + _Out_ PSYMCRYPT_MODULUS pmDst ); + +PSYMCRYPT_MODELEMENT +SYMCRYPT_CALL +SymCryptFdefModElementAllocate( _In_ PCSYMCRYPT_MODULUS pmMod ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModElementFree( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peObj ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefSizeofModElementFromModulus( PCSYMCRYPT_MODULUS pmMod ); + +PSYMCRYPT_MODELEMENT +SYMCRYPT_CALL +SymCryptFdefModElementCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + PCSYMCRYPT_MODULUS pmMod ); + +PSYMCRYPT_MODELEMENT +SYMCRYPT_CALL +SymCryptFdefModElementRetrieveHandle( _In_ PBYTE pbBuffer ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModElementWipe( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peDst ); + +VOID +SymCryptFdefModElementCopy( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst ); + +VOID +SymCryptFdefModElementMaskedCopy( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst, + UINT32 mask ); + +PSYMCRYPT_DIVISOR +SYMCRYPT_CALL +SymCryptFdefDivisorFromModulus( _In_ PSYMCRYPT_MODULUS pmSrc ); + +VOID +SymCryptFdefModElementConditionalSwap( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_ PSYMCRYPT_MODELEMENT peData1, + _Inout_ PSYMCRYPT_MODELEMENT peData2, + _In_ UINT32 cond ); + +PSYMCRYPT_INT +SYMCRYPT_CALL +SymCryptFdefIntFromModulus( _In_ PSYMCRYPT_MODULUS pmSrc ); + +VOID +SYMCRYPT_CALL +SymCryptFdefIntToModulus( + _In_ PCSYMCRYPT_INT piSrc, + _Out_ PSYMCRYPT_MODULUS pmDst, + UINT32 averageOperations, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefIntToModElement( + _In_ PCSYMCRYPT_INT piSrc, + _In_ PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModElementToIntGeneric( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_reads_bytes_( pmMod->nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) + PCUINT32 pSrc, + _Out_ PSYMCRYPT_INT piDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptFdefRawSetValue( + _In_reads_bytes_(cbSrc) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_NUMBER_FORMAT format, + _Out_writes_(nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pDst, + UINT32 nDigits ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptFdefModElementSetValueGeneric( + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + SYMCRYPT_NUMBER_FORMAT format, + _In_ PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModElementSetValueUint32Generic( + UINT32 value, + _In_ PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModElementSetValueNegUint32( + UINT32 value, + _In_ PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptFdefRawGetValue( + _In_reads_(nDigits * SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc, + UINT32 nDigits, + _Out_writes_bytes_(cbDst) PBYTE pbDst, + SIZE_T cbDst, + SYMCRYPT_NUMBER_FORMAT format ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptFdefModElementGetValue( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_writes_bytes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst, + SYMCRYPT_NUMBER_FORMAT format, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefModElementIsEqual( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2 ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefModElementIsZero( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModAddGeneric( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModAddMulx256Asm( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2, + _Out_ PSYMCRYPT_MODELEMENT peDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModAddMulx384Asm( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2, + _Out_ PSYMCRYPT_MODELEMENT peDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModAdd256Asm( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2, + _Out_ PSYMCRYPT_MODELEMENT peDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModAdd384Asm( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2, + _Out_ PSYMCRYPT_MODELEMENT peDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdef369ModAddGeneric( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModSubGeneric( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdef369ModSubGeneric( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModSub256Asm( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2, + _Out_ PSYMCRYPT_MODELEMENT peDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModSub384Asm( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc1, + _In_ PCSYMCRYPT_MODELEMENT peSrc2, + _Out_ PSYMCRYPT_MODELEMENT peDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModNegGeneric( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModSetPostGeneric( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_ PSYMCRYPT_MODELEMENT peObj, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModSetPostMontgomery( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_ PSYMCRYPT_MODELEMENT peObj, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModSetPostMontgomeryMulx256( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_ PSYMCRYPT_MODELEMENT peObj, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModSetPostMontgomeryMulxP384( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_ PSYMCRYPT_MODELEMENT peObj, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdef369ModSetPostMontgomery( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_ PSYMCRYPT_MODELEMENT peObj, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +PCUINT32 +SYMCRYPT_CALL +SymCryptFdefModPreGetGeneric( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peObj, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +PCUINT32 +SYMCRYPT_CALL +SymCryptFdefModPreGetMontgomery( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peObj, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +PCUINT32 +SYMCRYPT_CALL +SymCryptFdefModPreGetMontgomery256( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peObj, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +PCUINT32 +SYMCRYPT_CALL +SymCryptFdef369ModPreGetMontgomery( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peObj, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModulusCopyFixupGeneric( + _In_ PCSYMCRYPT_MODULUS pmSrc, + _Out_ PSYMCRYPT_MODULUS pmDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModulusCopyFixupMontgomery( + _In_ PCSYMCRYPT_MODULUS pmSrc, + _Out_ PSYMCRYPT_MODULUS pmDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModulusInitGeneric( + _Inout_ PSYMCRYPT_MODULUS pmObj, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModulusInitMontgomeryInternal( + _Inout_ PSYMCRYPT_MODULUS pmObj, + UINT32 nUint32Used, // R = 2^{32 * this parameter} + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModulusInitMontgomery( + _Inout_ PSYMCRYPT_MODULUS pmObj, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModulusInitMontgomery256( + _Inout_ PSYMCRYPT_MODULUS pmObj, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdef369ModulusInitMontgomery( + _Inout_ PSYMCRYPT_MODULUS pmObj, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawAdd( + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 Src1, + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 Src2, + _Out_writes_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PUINT32 Dst, + UINT32 nDigits ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawSub( + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc1, + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc2, + _Out_writes_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PUINT32 pDst, + UINT32 nDigits ); +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawSubUint32( + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc1, + UINT32 Src2, + _Out_writes_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PUINT32 pDst, + UINT32 nDigits ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModMulGeneric( + _In_ PCSYMCRYPT_MODULUS pMod, + _In_ PCSYMCRYPT_MODELEMENT pSrc1, + _In_ PCSYMCRYPT_MODELEMENT pSrc2, + _Out_ PSYMCRYPT_MODELEMENT pDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModMulMontgomery( + _In_ PCSYMCRYPT_MODULUS pMod, + _In_ PCSYMCRYPT_MODELEMENT pSrc1, + _In_ PCSYMCRYPT_MODELEMENT pSrc2, + _Out_ PSYMCRYPT_MODELEMENT pDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModMulMontgomeryMulx256Asm( + _In_ PCSYMCRYPT_MODULUS pMod, + _In_ PCSYMCRYPT_MODELEMENT pSrc1, + _In_ PCSYMCRYPT_MODELEMENT pSrc2, + _Out_ PSYMCRYPT_MODELEMENT pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModMulMontgomeryMulxP384Asm( + _In_ PCSYMCRYPT_MODULUS pMod, + _In_ PCSYMCRYPT_MODELEMENT pSrc1, + _In_ PCSYMCRYPT_MODELEMENT pSrc2, + _Out_ PSYMCRYPT_MODELEMENT pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModMulMontgomery256Asm( + _In_ PCSYMCRYPT_MODULUS pMod, + _In_ PCSYMCRYPT_MODELEMENT pSrc1, + _In_ PCSYMCRYPT_MODELEMENT pSrc2, + _Out_ PSYMCRYPT_MODELEMENT pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModMulMontgomeryP384Asm( + _In_ PCSYMCRYPT_MODULUS pMod, + _In_ PCSYMCRYPT_MODELEMENT pSrc1, + _In_ PCSYMCRYPT_MODELEMENT pSrc2, + _Out_ PSYMCRYPT_MODELEMENT pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdef369ModMulMontgomery( + _In_ PCSYMCRYPT_MODULUS pMod, + _In_ PCSYMCRYPT_MODELEMENT pSrc1, + _In_ PCSYMCRYPT_MODELEMENT pSrc2, + _Out_ PSYMCRYPT_MODELEMENT pDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModMulMontgomeryMulx( + _In_ PCSYMCRYPT_MODULUS pMod, + _In_ PCSYMCRYPT_MODELEMENT pSrc1, + _In_ PCSYMCRYPT_MODELEMENT pSrc2, + _Out_ PSYMCRYPT_MODELEMENT pDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModMulMontgomeryMulx1024( + _In_ PCSYMCRYPT_MODULUS pMod, + _In_ PCSYMCRYPT_MODELEMENT pSrc1, + _In_ PCSYMCRYPT_MODELEMENT pSrc2, + _Out_ PSYMCRYPT_MODELEMENT pDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + + +VOID +SYMCRYPT_CALL +SymCryptFdefModSquareGeneric( + _In_ PCSYMCRYPT_MODULUS pMod, + _In_ PCSYMCRYPT_MODELEMENT pSrc, + _Out_ PSYMCRYPT_MODELEMENT pDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModSquareMontgomery( + _In_ PCSYMCRYPT_MODULUS pMod, + _In_ PCSYMCRYPT_MODELEMENT pSrc, + _Out_ PSYMCRYPT_MODELEMENT pDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModSquareMontgomeryMulx256Asm( + _In_ PCSYMCRYPT_MODULUS pMod, + _In_ PCSYMCRYPT_MODELEMENT pSrc, + _Out_ PSYMCRYPT_MODELEMENT pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModSquareMontgomeryMulxP384Asm( + _In_ PCSYMCRYPT_MODULUS pMod, + _In_ PCSYMCRYPT_MODELEMENT pSrc, + _Out_ PSYMCRYPT_MODELEMENT pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModSquareMontgomery256Asm( + _In_ PCSYMCRYPT_MODULUS pMod, + _In_ PCSYMCRYPT_MODELEMENT pSrc1, + _In_ PCSYMCRYPT_MODELEMENT pSrc2, + _Out_ PSYMCRYPT_MODELEMENT pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModSquareMontgomeryP384Asm( + _In_ PCSYMCRYPT_MODULUS pMod, + _In_ PCSYMCRYPT_MODELEMENT pSrc1, + _In_ PCSYMCRYPT_MODELEMENT pSrc2, + _Out_ PSYMCRYPT_MODELEMENT pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdef369ModSquareMontgomery( + _In_ PCSYMCRYPT_MODULUS pMod, + _In_ PCSYMCRYPT_MODELEMENT pSrc, + _Out_ PSYMCRYPT_MODELEMENT pDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModSquareMontgomeryMulx( + _In_ PCSYMCRYPT_MODULUS pMod, + _In_ PCSYMCRYPT_MODELEMENT pSrc, + _Out_ PSYMCRYPT_MODELEMENT pDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModSquareMontgomeryMulx1024( + _In_ PCSYMCRYPT_MODULUS pMod, + _In_ PCSYMCRYPT_MODELEMENT pSrc, + _Out_ PSYMCRYPT_MODELEMENT pDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + + +VOID +SYMCRYPT_CALL +SymCryptFdefRawMul( + _In_reads_(nDigits1*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc1, + UINT32 nDigits1, + _In_reads_(nDigits2*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc2, + UINT32 nDigits2, + _Out_writes_((nDigits1+nDigits2)*SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefRawMulMulx( + _In_reads_(nDigits1*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc1, + UINT32 nDigits1, + _In_reads_(nDigits2*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc2, + UINT32 nDigits2, + _Out_writes_((nDigits1+nDigits2)*SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefRawMulMulx1024( + _In_reads_(nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc1, + _In_reads_(nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc2, + UINT32 nDigits, + _Out_writes_(2*nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefRawSquare( + _In_reads_(nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc, + UINT32 nDigits, + _Out_writes_(2*nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefRawSquareMulx( + _In_reads_(nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc, + UINT32 nDigits, + _Out_writes_(2*nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefRawSquareMulx1024( + _In_reads_(nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc, + UINT32 nDigits, + _Out_writes_(2*nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdef369RawMul( + _In_reads_(nDigits1*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc1, + UINT32 nDigits1, + _In_reads_(nDigits2*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc2, + UINT32 nDigits2, + _Out_writes_((nDigits1+nDigits2)*SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pDst ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawIsEqualUint32( + _In_reads_(nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc1, + UINT32 nDigits, + _In_ UINT32 u32Src2 ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawNeg( + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc1, + UINT32 carryIn, + _Out_writes_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PUINT32 pDst, + UINT32 nDigits ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawMaskedAdd( + _Inout_updates_( nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32 ) PUINT32 pAcc, + _In_reads_( nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32 ) PCUINT32 pSrc, + UINT32 mask, + UINT32 nDigits ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawMaskedSub( + _Inout_updates_( nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32 ) PUINT32 pAcc, + _In_reads_( nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32 ) PCUINT32 pSrc, + UINT32 mask, + UINT32 nDigits ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModDivPow2( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + UINT32 exp, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModDivSmallPow2( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _In_range_(1, NATIVE_BITS) UINT32 exp, + _Out_ PSYMCRYPT_MODELEMENT peDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModDivSmallPow2Asm( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _In_range_(1, NATIVE_BITS) UINT32 exp, + _Out_ PSYMCRYPT_MODELEMENT peDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModDivSmallPow2Mulx( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peSrc, + _In_range_(1, NATIVE_BITS) UINT32 exp, + _Out_ PSYMCRYPT_MODELEMENT peDst ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptFdefModInvGeneric( + _In_ PCSYMCRYPT_MODULUS pMod, + _In_ PCSYMCRYPT_MODELEMENT pSrc, + _Out_ PSYMCRYPT_MODELEMENT pDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptFdefModInvMontgomery( + _In_ PCSYMCRYPT_MODULUS pMod, + _In_ PCSYMCRYPT_MODELEMENT pSrc, + _Out_ PSYMCRYPT_MODELEMENT pDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptFdefModInvMontgomery256( + _In_ PCSYMCRYPT_MODULUS pMod, + _In_ PCSYMCRYPT_MODELEMENT pSrc, + _Out_ PSYMCRYPT_MODELEMENT pDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptFdef369ModInvMontgomery( + _In_ PCSYMCRYPT_MODULUS pMod, + _In_ PCSYMCRYPT_MODELEMENT pSrc, + _Out_ PSYMCRYPT_MODELEMENT pDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptModExpGeneric( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_ PCSYMCRYPT_MODELEMENT peBase, + _In_ PCSYMCRYPT_INT piExp, + UINT32 nBitsExp, + UINT32 flags, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptModMultiExpGeneric( + _In_ PCSYMCRYPT_MODULUS pmMod, + _In_reads_( nBases ) PCSYMCRYPT_MODELEMENT * peBaseArray, + _In_reads_( nBases ) PCSYMCRYPT_INT * piExpArray, + UINT32 nBases, + UINT32 nBitsExp, + UINT32 flags, + _Out_ PSYMCRYPT_MODELEMENT peDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefModSetRandomGeneric( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Out_ PSYMCRYPT_MODELEMENT peDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawAddUint32( + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 Src1, + UINT32 Src2, + _Out_writes_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PUINT32 Dst, + UINT32 nDigits ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawAddAsm( + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 Src1, + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 Src2, + _Out_writes_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PUINT32 Dst, + UINT32 nDigits ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdef369RawAddAsm( + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 Src1, + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 Src2, + _Out_writes_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PUINT32 Dst, + UINT32 nDigits ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawSubAsm( + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc1, + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc2, + _Out_writes_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PUINT32 pDst, + UINT32 nDigits ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdef369RawSubAsm( + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc1, + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc2, + _Out_writes_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PUINT32 pDst, + UINT32 nDigits ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefRawIsLessThan( + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc1, + _In_reads_bytes_(nDigits * SYMCRYPT_FDEF_DIGIT_SIZE ) PCUINT32 pSrc2, + UINT32 nDigits ); + +VOID +SYMCRYPT_CALL +SymCryptFdefMaskedCopyAsm( + _In_reads_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE ) PCBYTE pbSrc, + _Inout_updates_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE ) PBYTE pbDst, + UINT32 nDigits, + UINT32 mask ); + +VOID +SYMCRYPT_CALL +SymCryptFdef369MaskedCopyAsm( + _In_reads_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE ) PCBYTE pbSrc, + _Inout_updates_bytes_( nDigits*SYMCRYPT_FDEF_DIGIT_SIZE ) PBYTE pbDst, + UINT32 nDigits, + UINT32 mask ); + +VOID +SYMCRYPT_CALL +SymCryptFdefRawMulAsm( + _In_reads_(nDigits1*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc1, + UINT32 nDigits1, + _In_reads_(nDigits2*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc2, + UINT32 nDigits2, + _Out_writes_((nDigits1+nDigits2)*SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefRawSquareAsm( + _In_reads_(nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc, + UINT32 nDigits, + _Out_writes_(2*nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdef369RawMulAsm( + _In_reads_(nDigits1*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc1, + UINT32 nDigits1, + _In_reads_(nDigits2*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc2, + UINT32 nDigits2, + _Out_writes_((nDigits1+nDigits2)*SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefRawMul512Asm( + _In_reads_(nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc1, + _In_reads_(nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc2, + UINT32 nDigits, + _Out_writes_(2*nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefRawSquare512Asm( + _In_reads_(nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc, + UINT32 nDigits, + _Out_writes_(2*nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefRawMul1024Asm( + _In_reads_(nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc1, + _In_reads_(nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc2, + UINT32 nDigits, + _Out_writes_(2*nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefRawSquare1024Asm( + _In_reads_(nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PCUINT32 pSrc, + UINT32 nDigits, + _Out_writes_(2*nDigits*SYMCRYPT_FDEF_DIGIT_NUINT32) PUINT32 pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefMontgomeryReduceAsm( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_ PUINT32 pSrc, + _Out_ PUINT32 pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefMontgomeryReduce256Asm( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_ PUINT32 pSrc, + _Out_ PUINT32 pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefMontgomeryReduce512Asm( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_ PUINT32 pSrc, + _Out_ PUINT32 pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefMontgomeryReduce1024Asm( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_ PUINT32 pSrc, + _Out_ PUINT32 pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdef369MontgomeryReduce( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_ PUINT32 pSrc, + _Out_ PUINT32 pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdef369MontgomeryReduceAsm( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_ PUINT32 pSrc, + _Out_ PUINT32 pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefMontgomeryReduceMulx( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_ PUINT32 pSrc, + _Out_ PUINT32 pDst ); + +VOID +SYMCRYPT_CALL +SymCryptFdefMontgomeryReduceMulx1024( + _In_ PCSYMCRYPT_MODULUS pmMod, + _Inout_ PUINT32 pSrc, + _Out_ PUINT32 pDst ); + + +//===================================================== +// Current state of FIPS tests for asymmetric keys +//===================================================== + +// -------------------------------------------------------------------- +// Key type | | +// & | Alg | Description +// Operation| | +// -------------------------------------------------------------------- +// Dlkey | DH | Requires use of named safe-prime group (otherwise we cannot perform private +// Generate | | key range check, or public key order validation). +// | | +// | | From SP800-56Ar3: +// | | Check private key is in the range [1, min(2^nBitsPriv, q)-1] +// | | nBitsPriv is specified either using a default value or using +// | | SymCryptDlkeySetPrivateKeyLength, such that 2s <= nBitsPriv <= nBitsOfQ. +// | | (s is the maximum security strength for a named safe-prime group as +// | | specified in SP800 - 56arev3) +// | | Check public key is in the range [2, p-2] +// | | Check that (Public key)^q == 1 mod p +// | | +// | | FIPS 140-3 does not require a further PCT before first use of the key. +// |----------------------------------------------------------- +// | DSA | Requires use of a Dlgroup which has q, but is not a named safe-prime group. +// | | +// | | FIPS 186-4 and SP800-89 do not require DSA keypair owners to perform +// | | validation of keypairs they generate. +// | | +// | | FIPS 140-3 requires that a module generating a Dlkey keypair for use in DSA +// | | must perform a PCT on the keypair before first operational use in DSA. +// | | As the Dlgroups supported by FIPS are distinct for DH and DSA, we can perform +// | | this PCT on key generation without fear of adverse performance. +// -------------------------------------------------------------------- +// Dlkey | DH | Requires use of named safe-prime group (otherwise we cannot perform private +// SetValue | | key range check, or public key order validation). +// | | +// | | From SP800-56Ar3: +// | | If importing a private key: +// | | Check private key is in the range [1, min(2^nBitsPriv, q)-1] +// | | nBitsPriv is specified either using a default value or using +// | | SymCryptDlkeySetPrivateKeyLength, such that 2s <= nBitsPriv <= nBitsOfQ. +// | | (s is the maximum security strength for a named safe-prime group as +// | | specified in SP800-56Arev3) +// | | +// | | If importing a public key: +// | | Check public key is in the range [2, p-2] +// | | Check that (Public key)^q == 1 mod p +// | | +// | | If importing both a private and public key, as above and also: +// | | Use the imported Private key to generate a Public key, and check the +// | | generated Public key is equal to the imported Public key. +// |----------------------------------------------------------- +// | DSA | Requires use of a Dlgroup which is not a named safe-prime group. +// | | +// | | FIPS 184-4 refers to SP800-89: +// | | If importing a public key: +// | | Check public key is in the range [2, p-2] +// | | Check that (Public key)^q == 1 mod p +// | | If importing a private and public key: +// | | Use the imported Private key to generate a Public key, and check the +// | | generated Public key is equal to the imported Public key. +// -------------------------------------------------------------------- +// Eckey | ECDH | Requires use of a NIST prime Elliptic Curve (P224, P256, P384, or P521) +// SetRandom| | +// | | From SP800-56Ar3: +// | | Check private key is in range [1, GOrd-1] +// | | Check public key is nonzero, has coordinates in the underlying field, and is a +// | | point on the curve +// | | Check that GOrd*(Public key) == O +// | | +// | | FIPS 140-3 does not require a further PCT before first use of the key +// |---------------------------------------------------------- +// | ECDSA | Requires use of a NIST prime Elliptic Curve (P224, P256, P384, or P521) +// | | +// | | FIPS 186-4 and SP800-89 do not require ECDSA keypair owners to perform +// | | validation of keypairs they generate. +// | | +// | | FIPS 140-3 requires that a module generating an Eckey keypair for use in ECDSA +// | | must perform a PCT on the keypair before first operational use in ECDSA. +// | | As the Elliptic curves used in ECDH and ECDSA are the same, an Eckey may be +// | | used for both ECDH and ECDSA. We defer the ECDSA PCT from the EckeySetRandom +// | | call to the first use of EcDsaSign, or the first export of the keypair. +// -------------------------------------------------------------------- +// Eckey | ECDH | Requires use of a NIST prime Elliptic Curve (P224, P256, P384, or P521) +// SetValue | | +// | | From SP800-56Ar3: +// | | If importing a private key: +// | | Check private key is in range [1, GOrd-1] +// | | +// | | If importing a public key: +// | | Check public key is nonzero, has coordinates in the underlying field, and is +// | | a point on the curve +// | | Check that GOrd*(Public key) == O +// | | +// | | If importing a private and public key: +// | | Use the imported Private key to generate a Public key, and check the +// | | generated Public key is equal to the imported Public key. +// |---------------------------------------------------------- +// | ECDSA | Requires use of a NIST prime Elliptic Curve (P224, P256, P384, or P521) +// | | +// | | FIPS 184-4 refers to SP800-89: +// | | If importing a public key: +// | | SP800-89 refers to ANS X9.62. Assume same tests required as SP800-56Ar3: +// | | Check public key is nonzero, has coordinates in the underlying field, and is +// | | a point on the curve +// | | Check that GOrd*(Public key) == O +// | | +// | | If importing a private and public key: +// | | Use the imported Private key to generate a Public key, and check the +// | | generated Public key is equal to the imported Public key. +// -------------------------------------------------------------------- +// Rsakey | RSA | From FIPS 186-4 (SIGN) and SP800-56Br2 (ENCRYPT for key transport): +// Generate |ENCRYPT| Ensure p and q are in open range (2 ^ ((nBits - 1) / 2), 2 ^ (nBits / 2)) +// | and | Ensure |p-q| > 2^((nBits/2)-100) +// | RSA | Ensure e is coprime with (p-1) and (q-1) +// | SIGN | Ensure d is in range [2 ^ (nBits/2) + 1, LCM(p-1,q-1) - 1] +// | | Ensure that d*e == 1 mod LCM(p-1,q-1) +// | | +// | | FIPS 140-3 requires that a module generating an Rsakey keypair for use in an +// | | RSA algorithm must perform a PCT on the keypair before first operational use. +// | | +// | | For ENCRYPT, SP800-56Br2 specifies the PCT to perform as part of key +// | | generation is: +// | | Check (m^e)^d == m mod n for some m in range [2, n-2] +// | | +// | | For SIGN, FIPS 186-4 refers to SP800-89, which does not clearly specify a +// | | PCT, but does specify that for an owner to have assurance of Private Key +// | | Possession they can sign a message with the private key and validate it with +// | | the public key to check they correspond to each other. Notably, this +// | | internally will verify (m^d)^e == m mod n for some m (along with testing +// | | additional padding logic) +// | | +// | | FIPS 140-2 explicitly says that only one PCT is required if a keypair may be +// | | used in either algorithm, with the module able to choose the PCT. +// | | FIPS 140-3 does not say anything specific about only requiring one PCT, but +// | | given that mathematically (m^e)^d == (m^ed) == (m^d)^e mod n, our +// | | current understanding is that the SIGN PCT works in lieu of the ENCRYPT PCT +// | | +// | | NOTE: FIPS 140-3 explicitly says that an RSA PCT cannot be used in lieu of an +// | | RSA algorithm selftest (CAST) +// -------------------------------------------------------------------- +// Rsakey | RSA | If importing a keypair (primes and modulus): +// SetValue |ENCRYPT| SP800-56Br2 specifies: +// | | Check (m^e)^d mod n == m for some m in range [2, n-2] +// | | Check n == p*q +// | | Check p and q are in open range (2 ^ ((nBits - 1) / 2), 2 ^ (nBits / 2)) +// | | Check |p-q| > 2^((nBits/2)-100) +// | | Check e is coprime with (p-1) and (q-1) +// | | Check p and q are probably prime +// | | Check d is in range [2 ^ (nBits/2) + 1, LCM(p-1,q-1) - 1] +// | | Check that d*e == 1 mod LCM(p-1,q-1) +// | | +// | | If importing a public key (only modulus): +// | | SP800-56Br2, refers to SP800-89 which details the following Partial Public Key +// | | Validation: +// | | Check n is odd +// | | Check n is not a prime or a power of a prime +// | | Check n has no factors smaller than 752 +// |---------------------------------------------------------- +// | RSA | FIPS 186-4 refers only to SP800-89 which has weaker tests for a keypair than +// | SIGN | SP800-56Br2 (i.e. success at SP800-56Br2 tests implies success in SP800-89) +// | | The current strategy will be to always perform the stronger tests. +// -------------------------------------------------------------------- + +// Macro for executing a Cryptographic Algorithm Self-Test (CAST) and setting the corresponding +// flag. These selftests must be run once per algorithm before the algorithm is used. For algorithms +// like hashing and symmetric encryption which have a low performance cost, we run the CASTs when +// the module is loaded. For asymmetric algorithms, we defer the CASTs until the first use of the +// algorithm; hence we need flags to keep track of which CASTs have been run. +#define SYMCRYPT_RUN_SELFTEST_ONCE(AlgorithmSelftestFunction, AlgorithmSelftestFlag) \ +if( ( g_SymCryptFipsSelftestsPerformed & AlgorithmSelftestFlag ) == 0 ) \ +{ \ + AlgorithmSelftestFunction( ); \ + SYMCRYPT_ATOMIC_OR32_PRE_RELAXED( &g_SymCryptFipsSelftestsPerformed, AlgorithmSelftestFlag ); \ +} + +// Macros for executing a pairwise consistency test on a key and setting the per-key selftest flag. +// Typically PCTs must be run for each key before the key is first used or exported, but the +// specific requirements vary between algorithms. +// +// Note that a PCT is not considered a CAST and thus does not satisfy the aforementioned requirement +// for algorithm selftests. +#define SYMCRYPT_RUN_KEY_GEN_PCT(KeySelftestFunction, Key, KeySelftestFlag) \ +if( ( Key->fAlgorithmInfo & (KeySelftestFlag | SYMCRYPT_FLAG_KEY_NO_FIPS) ) == 0 ) \ +{ \ + /* PCT should never fail on key generation - FIPS assert that it does not */ \ + SYMCRYPT_FIPS_ASSERT( KeySelftestFunction( Key ) == SYMCRYPT_NO_ERROR ); \ + SYMCRYPT_ATOMIC_OR32_PRE_RELAXED(&Key->fAlgorithmInfo, KeySelftestFlag); \ +} + +// Macro to check flag used in fAlgorithmInfo is non-zero and a power of 2 +#define CHECK_ALGORITHM_INFO_FLAG_POW2( flag ) \ + C_ASSERT( (flag != 0) && ((flag & (flag-1)) == 0) ); + +// Macro to check flags used together in fAlgorithmInfo are distinct +#define CHECK_ALGORITHM_INFO_FLAGS_DISTINCT( flag0, flag1, flag2, flag3, flag4 ) \ + C_ASSERT( (flag0 < flag1) && (flag1 < flag2) && (flag2 < flag3) && (flag3 < flag4) ); + +CHECK_ALGORITHM_INFO_FLAG_POW2(SYMCRYPT_PCT_DSA); +CHECK_ALGORITHM_INFO_FLAG_POW2(SYMCRYPT_PCT_ECDSA); +CHECK_ALGORITHM_INFO_FLAG_POW2(SYMCRYPT_PCT_RSA_SIGN); + +CHECK_ALGORITHM_INFO_FLAG_POW2(SYMCRYPT_FLAG_KEY_NO_FIPS); +CHECK_ALGORITHM_INFO_FLAG_POW2(SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION); + +CHECK_ALGORITHM_INFO_FLAG_POW2(SYMCRYPT_FLAG_DLKEY_DSA); +CHECK_ALGORITHM_INFO_FLAG_POW2(SYMCRYPT_FLAG_DLKEY_DH); + +CHECK_ALGORITHM_INFO_FLAG_POW2(SYMCRYPT_FLAG_ECKEY_ECDSA); +CHECK_ALGORITHM_INFO_FLAG_POW2(SYMCRYPT_FLAG_ECKEY_ECDH); + +CHECK_ALGORITHM_INFO_FLAG_POW2(SYMCRYPT_FLAG_RSAKEY_SIGN); +CHECK_ALGORITHM_INFO_FLAG_POW2(SYMCRYPT_FLAG_RSAKEY_ENCRYPT); + +CHECK_ALGORITHM_INFO_FLAGS_DISTINCT(SYMCRYPT_PCT_DSA, SYMCRYPT_FLAG_KEY_NO_FIPS, SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION, SYMCRYPT_FLAG_DLKEY_DSA, SYMCRYPT_FLAG_DLKEY_DH); +CHECK_ALGORITHM_INFO_FLAGS_DISTINCT(SYMCRYPT_PCT_ECDSA, SYMCRYPT_FLAG_KEY_NO_FIPS, SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION, SYMCRYPT_FLAG_ECKEY_ECDSA, SYMCRYPT_FLAG_ECKEY_ECDH); +CHECK_ALGORITHM_INFO_FLAGS_DISTINCT(SYMCRYPT_PCT_RSA_SIGN, SYMCRYPT_FLAG_KEY_NO_FIPS, SYMCRYPT_FLAG_KEY_MINIMAL_VALIDATION, SYMCRYPT_FLAG_RSAKEY_SIGN, SYMCRYPT_FLAG_RSAKEY_ENCRYPT); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptRsaSignVerifyPct( PCSYMCRYPT_RSAKEY pkRsakey ); +// +// FIPS pairwise consistency test for RSA sign/verify. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptDsaPct( PCSYMCRYPT_DLKEY pkDlkey ); +// +// FIPS pairwise consistency test for DSA sign/verify. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEcDsaPct( PCSYMCRYPT_ECKEY pkEckey ); +// +// FIPS pairwise consistency test for ECDSA sign/verify. +// + +typedef struct _SYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS { + SYMCRYPT_DLGROUP_DH_SAFEPRIMETYPE eDhSafePrimeType; + + PCBYTE pcbPrimeP; + + UINT32 nBitsOfP; // nBitsOfQ == nBitsOfP-1 + UINT32 nMinBitsPriv; // nMinBitsPriv == 2s + // s is the maximum security strength supported by the group based on SP800-56Arev3 + UINT32 nDefaultBitsPriv; // nBitsOfQ >= nDefaultBitsPriv >= nMinBitsPriv + // nDefaultBitsPriv will be the default value of nBitsPriv for a Dlkey in this Dlgroup + // nBitsPriv is the maximum length of the private key +} SYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS; +typedef const SYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS * PCSYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS; +// +// SYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS is used to specify all the parameters needed for creation +// of a Dlgroup based on a safe-prime group (i.e. p = 2q+1, and g = 2). +// Currently this is used exclusively internally, and the interface for explicitly specifying use of +// safe-prime group in SymCrypt is to use + +// Internally supported Safe Prime groups +extern const PCSYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS SymCryptDlgroupDhSafePrimeParamsModp2048; +extern const PCSYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS SymCryptDlgroupDhSafePrimeParamsModp3072; +extern const PCSYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS SymCryptDlgroupDhSafePrimeParamsModp4096; +extern const PCSYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS SymCryptDlgroupDhSafePrimeParamsModp6144; +extern const PCSYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS SymCryptDlgroupDhSafePrimeParamsModp8192; + +extern const PCSYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS SymCryptDlgroupDhSafePrimeParamsffdhe2048; +extern const PCSYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS SymCryptDlgroupDhSafePrimeParamsffdhe3072; +extern const PCSYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS SymCryptDlgroupDhSafePrimeParamsffdhe4096; +extern const PCSYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS SymCryptDlgroupDhSafePrimeParamsffdhe6144; +extern const PCSYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS SymCryptDlgroupDhSafePrimeParamsffdhe8192; + +#define SYMCRYPT_DH_SAFEPRIME_GROUP_COUNT (10) + +// Note, we rely on the ordering of the parameters from smallest to largest within each named set of +// safe-prime groups as we iterate through them assuming this order in SymCryptDlgroupSetValueSafePrime +extern const PCSYMCRYPT_DLGROUP_DH_SAFEPRIME_PARAMS SymCryptNamedSafePrimeGroups[SYMCRYPT_DH_SAFEPRIME_GROUP_COUNT]; + +// +// Definitions for ECurve dispatch functions +// +typedef VOID (SYMCRYPT_CALL * PSYMCRYPT_ECPOINT_SET_ZERO_FUNC) ( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +typedef VOID (SYMCRYPT_CALL * PSYMCRYPT_ECPOINT_SET_DISTINGUISHED_FUNC) ( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +typedef VOID (SYMCRYPT_CALL * PSYMCRYPT_ECPOINT_SET_RANDOM_FUNC) ( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Out_ PSYMCRYPT_INT piScalar, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +typedef UINT32 (SYMCRYPT_CALL * PSYMCRYPT_ECPOINT_ISEQUAL_FUNC) ( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc1, + _In_ PCSYMCRYPT_ECPOINT poSrc2, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch); + +typedef UINT32 (SYMCRYPT_CALL * PSYMCRYPT_ECPOINT_ONCURVE_FUNC) ( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +typedef UINT32 (SYMCRYPT_CALL * PSYMCRYPT_ECPOINT_ISZERO_FUNC) ( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +typedef VOID (SYMCRYPT_CALL * PSYMCRYPT_ECPOINT_ADD_FUNC) ( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc1, + _In_ PCSYMCRYPT_ECPOINT poSrc2, + _Out_ PSYMCRYPT_ECPOINT poDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +typedef VOID (SYMCRYPT_CALL * PSYMCRYPT_ECPOINT_ADD_DIFF_NONZERO_FUNC) ( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc1, + _In_ PCSYMCRYPT_ECPOINT poSrc2, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +typedef VOID (SYMCRYPT_CALL * PSYMCRYPT_ECPOINT_DOUBLE_FUNC) ( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_ PSYMCRYPT_ECPOINT poDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +typedef VOID (SYMCRYPT_CALL * PSYMCRYPT_ECPOINT_NEGATE_FUNC) ( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Inout_ PSYMCRYPT_ECPOINT poSrc, + UINT32 mask, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +typedef SYMCRYPT_ERROR (SYMCRYPT_CALL * PSYMCRYPT_ECPOINT_SCALAR_MUL_FUNC) ( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_INT piScalar, + _In_opt_ + PCSYMCRYPT_ECPOINT poSrc, + UINT32 flags, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +typedef SYMCRYPT_ERROR (SYMCRYPT_CALL * PSYMCRYPT_ECPOINT_MULTI_SCALAR_MUL_FUNC) ( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_reads_( nPoints ) PCSYMCRYPT_INT * piSrcScalarArray, + _In_reads_( nPoints ) PCSYMCRYPT_ECPOINT * poSrcEcpointArray, + UINT32 nPoints, + UINT32 flags, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +typedef VOID (SYMCRYPT_CALL * PSYMCRYPT_ECURVE_FILL_SCRATCH_SPACES_FUNC) ( + _Inout_ PSYMCRYPT_ECURVE pCurve ); + + +typedef struct _SYMCRYPT_ECURVE_FUNCTIONS +{ + PSYMCRYPT_ECPOINT_SET_ZERO_FUNC setZeroFunc; + PSYMCRYPT_ECPOINT_SET_DISTINGUISHED_FUNC setDistinguishedFunc; + PSYMCRYPT_ECPOINT_SET_RANDOM_FUNC setRandomFunc; + PSYMCRYPT_ECPOINT_ISEQUAL_FUNC isEqualFunc; + PSYMCRYPT_ECPOINT_ISZERO_FUNC isZeroFunc; + PSYMCRYPT_ECPOINT_ONCURVE_FUNC onCurveFunc; + PSYMCRYPT_ECPOINT_ADD_FUNC addFunc; + PSYMCRYPT_ECPOINT_ADD_DIFF_NONZERO_FUNC addDiffFunc; + PSYMCRYPT_ECPOINT_DOUBLE_FUNC doubleFunc; + PSYMCRYPT_ECPOINT_NEGATE_FUNC negateFunc; + PSYMCRYPT_ECPOINT_SCALAR_MUL_FUNC scalarMulFunc; + PSYMCRYPT_ECPOINT_MULTI_SCALAR_MUL_FUNC multiScalarMulFunc; + PSYMCRYPT_ECURVE_FILL_SCRATCH_SPACES_FUNC fillScratchSpacesFunc; + PVOID slack[3]; +} SYMCRYPT_ECURVE_FUNCTIONS, *PSYMCRYPT_ECURVE_FUNCTIONS; +typedef const SYMCRYPT_ECURVE_FUNCTIONS *PCSYMCRYPT_ECURVE_FUNCTIONS; + +#define SYMCRYPT_ECURVE_FUNCTIONS_SIZE (sizeof( SYMCRYPT_ECURVE_FUNCTIONS ) ) + +// Check that the size is a power of 2 +C_ASSERT( (SYMCRYPT_ECURVE_FUNCTIONS_SIZE & (SYMCRYPT_ECURVE_FUNCTIONS_SIZE-1)) == 0 ); + +// +// Functions for the each type of curve +// + +//-------------------------------------------------------- +//--------- Short Weierstrass ---------------------------- +//-------------------------------------------------------- + +extern const PCSYMCRYPT_ECURVE_PARAMS_V2_EXTENSION SymCryptEcurveParamsV2ExtensionShortWeierstrass; + +VOID +SYMCRYPT_CALL +SymCryptShortWeierstrassFillScratchSpaces( _In_ PSYMCRYPT_ECURVE pCurve ); + +VOID +SYMCRYPT_CALL +SymCryptShortWeierstrassSetZero( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptShortWeierstrassSetDistinguished( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +UINT32 +SYMCRYPT_CALL +SymCryptShortWeierstrassIsEqual( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc1, + _In_ PCSYMCRYPT_ECPOINT poSrc2, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +UINT32 +SYMCRYPT_CALL +SymCryptShortWeierstrassIsZero( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +UINT32 +SYMCRYPT_CALL +SymCryptShortWeierstrassOnCurve( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptShortWeierstrassAdd( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc1, + _In_ PCSYMCRYPT_ECPOINT poSrc2, + _Out_ PSYMCRYPT_ECPOINT poDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptShortWeierstrassAddDiffNonZero( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc1, + _In_ PCSYMCRYPT_ECPOINT poSrc2, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptShortWeierstrassDouble( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_ PSYMCRYPT_ECPOINT poDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptShortWeierstrassNegate( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Inout_ PSYMCRYPT_ECPOINT poSrc, + UINT32 mask, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptShortWeierstrassDoubleSpecializedAm3( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_ PSYMCRYPT_ECPOINT poDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +//-------------------------------------------------------- +//--------- Twisted Edwards ------------------------------ +//-------------------------------------------------------- + +extern const PCSYMCRYPT_ECURVE_PARAMS_V2_EXTENSION SymCryptEcurveParamsV2ExtensionTwistedEdwards; + +VOID +SYMCRYPT_CALL +SymCryptTwistedEdwardsFillScratchSpaces( _In_ PSYMCRYPT_ECURVE pCurve ); + +VOID +SYMCRYPT_CALL +SymCryptTwistedEdwardsSetDistinguished( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch); + +VOID +SYMCRYPT_CALL +SymCryptTwistedEdwardsAdd( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc1, + _In_ PCSYMCRYPT_ECPOINT poSrc2, + _Out_ PSYMCRYPT_ECPOINT poDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptTwistedEdwardsAddDiffNonZero( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc1, + _In_ PCSYMCRYPT_ECPOINT poSrc2, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptTwistedEdwardsDouble( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_ PSYMCRYPT_ECPOINT poDst, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch); + +UINT32 +SYMCRYPT_CALL +SymCryptTwistedEdwardsIsEqual( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc1, + _In_ PCSYMCRYPT_ECPOINT poSrc2, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch); + +UINT32 +SYMCRYPT_CALL +SymCryptTwistedEdwardsOnCurve( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch); + +UINT32 +SYMCRYPT_CALL +SymCryptTwistedEdwardsIsZero( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch); + +VOID +SYMCRYPT_CALL +SymCryptTwistedEdwardsSetZero( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch); + +VOID +SYMCRYPT_CALL +SymCryptTwistedEdwardsNegate( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Inout_ PSYMCRYPT_ECPOINT poSrc, + UINT32 mask, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +//-------------------------------------------------------- +//--------- Montgomery ----------------------------------- +//-------------------------------------------------------- + +extern const PCSYMCRYPT_ECURVE_PARAMS_V2_EXTENSION SymCryptEcurveParamsV2ExtensionMontgomery; + +VOID +SYMCRYPT_CALL +SymCryptMontgomeryFillScratchSpaces( _In_ PSYMCRYPT_ECURVE pCurve ); + +VOID +SYMCRYPT_CALL +SymCryptMontgomerySetDistinguished( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +UINT32 +SYMCRYPT_CALL +SymCryptMontgomeryIsEqual( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc1, + _In_ PCSYMCRYPT_ECPOINT poSrc2, + UINT32 flags, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch); + +UINT32 +SYMCRYPT_CALL +SymCryptMontgomeryIsZero( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_ECPOINT poSrc, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMontgomeryPointScalarMul( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_INT piScalar, + _In_opt_ + PCSYMCRYPT_ECPOINT poSrc, + UINT32 flags, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +//-------------------------------------------------------- +//--------- Generic multiplication-related functions ----- +//-------------------------------------------------------- + +VOID +SYMCRYPT_CALL +SymCryptOfflinePrecomputation( + _In_ PSYMCRYPT_ECURVE pCurve, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEcpointScalarMulFixedWindow( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_ PCSYMCRYPT_INT piScalar, + _In_opt_ + PCSYMCRYPT_ECPOINT poSrc, + UINT32 flags, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEcpointMultiScalarMulWnafWithInterleaving( + _In_ PCSYMCRYPT_ECURVE pCurve, + _In_reads_( nPoints ) PCSYMCRYPT_INT * piSrcScalarArray, + _In_reads_( nPoints ) PCSYMCRYPT_ECPOINT * poSrcEcpointArray, + UINT32 nPoints, + UINT32 flags, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptEcpointGenericSetRandom( + _In_ PCSYMCRYPT_ECURVE pCurve, + _Out_ PSYMCRYPT_INT piScalar, + _Out_ PSYMCRYPT_ECPOINT poDst, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptEcurveFillScratchSpaces( + _Inout_ PSYMCRYPT_ECURVE pCurve); +//-------------------------------------------------------- +//-------------------------------------------------------- + +// Table with the number of field elements for each point format (in ecpoint.c) +extern const UINT32 SymCryptEcpointFormatNumberofElements[4]; + +UINT32 +SYMCRYPT_CALL +SymCryptSizeofEcpointEx( + UINT32 cbModElement, + UINT32 numOfCoordinates ); + + +PCSYMCRYPT_TRIALDIVISION_CONTEXT +SYMCRYPT_CALL +SymCryptFdefCreateTrialDivisionContext( UINT32 nDigits ); + +UINT32 +SYMCRYPT_CALL +SymCryptFdefIntFindSmallDivisor( + _In_ PCSYMCRYPT_TRIALDIVISION_CONTEXT pContext, + _In_ PCSYMCRYPT_INT piSrc, + _Out_writes_bytes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ); + +VOID +SYMCRYPT_CALL +SymCryptFdefFreeTrialDivisionContext( PCSYMCRYPT_TRIALDIVISION_CONTEXT pContext ); + +UINT64 +SymCryptInverseMod2e64( UINT64 m ); + + +//-------------------------------------------------------- +//-------------------------------------------------------- + +// Helper function for wiping the Ec key's private state (e.g. for use in layers such as composites) +VOID +SYMCRYPT_CALL +SymCryptEckeyWipePrivateState( + _Inout_ PSYMCRYPT_ECKEY pkEckey ); + +// Recoding algorithms +VOID +SYMCRYPT_CALL +SymCryptFixedWindowRecoding( + UINT32 W, + _Inout_ PSYMCRYPT_INT piK, + _Inout_ PSYMCRYPT_INT piTmp, + _Out_writes_( nRecodedDigits ) + PUINT32 absofKIs, + _Out_writes_( nRecodedDigits ) + PUINT32 sigofKIs, + UINT32 nRecodedDigits ); + +VOID +SYMCRYPT_CALL +SymCryptWidthNafRecoding( + UINT32 W, + _Inout_ PSYMCRYPT_INT piK, + _Out_writes_( nRecodedDigits ) + PUINT32 absofKIs, + _Out_writes_( nRecodedDigits ) + PUINT32 sigofKIs, + UINT32 nRecodedDigits ); + +VOID +SYMCRYPT_CALL +SymCryptPositiveWidthNafRecoding( + UINT32 W, + _In_ PCSYMCRYPT_INT piK, + UINT32 nBitsExp, + _Out_writes_( nRecodedDigits ) + PUINT32 absofKIs, + UINT32 nRecodedDigits ); + +// M-LWE: Module Learning-With-Errors (ML-KEM, ML-DSA) +// +// ML-KEM (also known as Kyber) and ML-DSA (also known as Dilithium) are Post-Quantum algorithms +// based on the Learning-With-Errors problem over Module Lattices (or the hardness of the M-LWE +// problem). +// +// A Module is a Vector Space over a Ring. That is, elements of the vector spaces are elements in +// the underlying ring. +// We refer to Module as MLWE in the below types to avoid naming confusion with Module as in +// "FIPS module". Though technically components acting on MLWE types could be used outside of the +// MLWE problem, these types are SymCrypt-internal, and are only currently intended for use in +// these MLWE-based algorithms. +// +// In ML-KEM and ML-DSA, Polynomial Rings are used. That is, a ring defined over polynomials. +// For both schemes, the polynomial ring is defined modulo the polynomial (X^256 + 1). This means +// there is a representative of each polynomial ring element with 256 coefficients +// (c_255*X^255 + c_254*X^254 + ... + c_0). The coefficients themselves are modulo a small prime +// in both schemes. For ML-KEM the small prime is 3329 (12-bits), and for ML-DSA the small prime +// is 8380417 (23-bits). +// Additionally, for both schemes there is a Number Theoretic Transform (NTT) which maps polynomial +// ring elements to a corresponding ring for efficient multiplication. +// The in-memory representation of a polynomial ring element uses the same struct regardless of +// whether it is in standard form, or the NTT form. For brevity we tend to refer to polynomial +// ring elements as PolyElements. +// +#define SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS (256) + +// MLWE internal function definitions are in their own headers +#include "sc_lib_mlkem.h" +#include "sc_lib_mldsa.h" + +// +// Common Composite Definitions +// + +typedef enum { + SYMCRYPT_CACHED_ECURVE_ID_NIST_P256 = 0, + SYMCRYPT_CACHED_ECURVE_ID_NIST_P384, + SYMCRYPT_CACHED_ECURVE_ID_CURVE_25519, + SYMCRYPT_CACHED_ECURVE_ID_COUNT +} SYMCRYPT_CACHED_ECURVE_ID, *PSYMCRYPT_CACHED_ECURVE_ID; + +PCSYMCRYPT_ECURVE +SYMCRYPT_CALL +SymCryptGetCachedEcurve( + SYMCRYPT_CACHED_ECURVE_ID curveId ); + +#define SYMCRYPT_COMPOSITE_SIZEOF_ENCODED_EC_PUBLIC_KEY_P256 (65) +#define SYMCRYPT_COMPOSITE_SIZEOF_ENCODED_EC_PUBLIC_KEY_P384 (97) +#define SYMCRYPT_COMPOSITE_SIZEOF_ENCODED_EC_PUBLIC_KEY_CURVE_25519 (32) + +#define SYMCRYPT_COMPOSITE_SIZEOF_MAX_ENCODED_EC_PUBLIC_KEY SYMCRYPT_COMPOSITE_SIZEOF_ENCODED_EC_PUBLIC_KEY_P384 + +#define SYMCRYPT_COMPOSITE_SIZEOF_ENCODED_EC_PRIVATE_KEY_P256 (51) +#define SYMCRYPT_COMPOSITE_SIZEOF_ENCODED_EC_PRIVATE_KEY_P384 (64) +#define SYMCRYPT_COMPOSITE_SIZEOF_ENCODED_EC_PRIVATE_KEY_CURVE_25519 (32) + +UINT32 +SYMCRYPT_CALL +SymCryptCompositeGetSizeOfEncodedEcSk( + SYMCRYPT_CACHED_ECURVE_ID curveId ); + +UINT32 +SYMCRYPT_CALL +SymCryptCompositeGetSizeOfEncodedEcPk( + SYMCRYPT_CACHED_ECURVE_ID curveId ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEckeyGetValueCompositeEncodingSk( + _In_ PCSYMCRYPT_ECKEY pEckey, + SYMCRYPT_CACHED_ECURVE_ID curveId, + _Out_writes_bytes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEckeyGetValueCompositeEncodingPk( + _In_ PCSYMCRYPT_ECKEY pEckey, + SYMCRYPT_CACHED_ECURVE_ID curveId, + _Out_writes_bytes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEckeySetValueCompositeEncodingPk( + _In_ SYMCRYPT_CACHED_ECURVE_ID curveId, + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + UINT32 flags, + _Inout_ PSYMCRYPT_ECKEY pEckey ); + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptEckeySetValueCompositeEncodingSk( + SYMCRYPT_CACHED_ECURVE_ID curveId, + _In_reads_bytes_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + UINT32 flags, + _Inout_ PSYMCRYPT_ECKEY pEckey ); + +// +// Composite ML-KEM definitions +// + +typedef struct _SYMCRYPT_COMPOSITE_MLKEM_INTERNAL_PARAMS { + SYMCRYPT_COMPOSITE_MLKEM_PARAMS params; + SYMCRYPT_CACHED_ECURVE_ID ecurveId; + SYMCRYPT_MLKEM_PARAMS mlKemParams; + PCBYTE pbLabel; + SIZE_T cbLabel; + SIZE_T cbCiphertext; + SYMCRYPT_NUMBER_FORMAT numFormat; + SYMCRYPT_ECPOINT_FORMAT ecPointFormat; + SIZE_T cbExpandedSeed; + SIZE_T cbEncodedPrivateKey; + SIZE_T cbEncodedPublicKey; +} SYMCRYPT_COMPOSITE_MLKEM_INTERNAL_PARAMS, *PSYMCRYPT_COMPOSITE_MLKEM_INTERNAL_PARAMS; +typedef const SYMCRYPT_COMPOSITE_MLKEM_INTERNAL_PARAMS *PCSYMCRYPT_COMPOSITE_MLKEM_INTERNAL_PARAMS; + +typedef SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_COMPOSITE_MLKEMKEY { + PCSYMCRYPT_COMPOSITE_MLKEM_INTERNAL_PARAMS pParams; // pointer to internal params for Composite ML-KEM being used + PSYMCRYPT_MLKEMKEY pkMlKemkey; + PSYMCRYPT_ECKEY pkEcKey; // all composite keys with the same elliptic curve type + // share the same lazily allocated curve object. This + // avoids the overhead of setting up a new curve object per key. + + BOOLEAN hasPrivateSeed; + BYTE privateSeed[SYMCRYPT_COMPOSITE_MLKEM_IRTF_PRIVATE_SEED_SIZE]; + + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_COMPOSITE_MLKEMKEY, *PSYMCRYPT_COMPOSITE_MLKEMKEY; +typedef const SYMCRYPT_COMPOSITE_MLKEMKEY *PCSYMCRYPT_COMPOSITE_MLKEMKEY; + +// Rejection sampling for generating an EC scalar from an IRTF Composite ML-KEM seed +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptCompositeMlKemGetRandomScalarForEcKeyEx( + SYMCRYPT_CACHED_ECURVE_ID ecurveId, + SYMCRYPT_NUMBER_FORMAT numFormat, + _In_reads_bytes_( cbSeed ) PCBYTE pbSeed, + SIZE_T cbSeed, + _Out_writes_bytes_( cbScalar ) PBYTE pbScalar, + SIZE_T cbScalar ); + +// +// XMSS +// + +// +// ADRS structure definitions as specified in RFC 8391 +// +typedef enum _XMSS_ADRS_TYPE +{ + XMSS_ADRS_TYPE_OTS = 0, + XMSS_ADRS_TYPE_LTREE = 1, + XMSS_ADRS_TYPE_HASH_TREE = 2, +} XMSS_ADRS_TYPE; + +typedef struct _XMSS_OTS_ADDRESS +{ + BYTE en32Leaf[4]; + BYTE en32Chain[4]; + BYTE en32Hash[4]; +} XMSS_OTS_ADDRESS, *PXMSS_OTS_ADDRESS; + +typedef struct _XMSS_LTREE_ADDRESS +{ + BYTE en32Leaf[4]; + BYTE en32Height[4]; + BYTE en32Index[4]; +} XMSS_LTREE_ADDRESS, * PXMSS_LTREE_ADDRESS; + +typedef struct _XMSS_HASHTREE_ADDRESS +{ + BYTE padding[4]; + BYTE en32Height[4]; + BYTE en32Index[4]; +} XMSS_HASHTREE_ADDRESS, * PXMSS_HASHTREE_ADDRESS; + +typedef struct _XMSS_ADRS +{ + BYTE en32Layer[4]; + BYTE en64Tree[8]; + BYTE en32Type[4]; + + union { + XMSS_OTS_ADDRESS ots; + XMSS_LTREE_ADDRESS ltree; + XMSS_HASHTREE_ADDRESS hashtree; + } u; + + BYTE en32KeyAndMask[4]; + +} XMSS_ADRS, *PXMSS_ADRS; + + +typedef SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_XMSS_KEY +{ + UINT32 version; + + SYMCRYPT_XMSS_PARAMS params; + + SYMCRYPT_XMSSKEY_TYPE keyType; + + // Public key + BYTE Root[SYMCRYPT_HASH_MAX_RESULT_SIZE]; + BYTE Seed[SYMCRYPT_HASH_MAX_RESULT_SIZE]; + + SYMCRYPT_MAGIC_FIELD + + // Private key + SYMCRYPT_ALIGN_AT(16) UINT64 Idx; // Aligning on 16-bytes to suppress clang warning + // when atomic increment is performed on it. + BYTE SkXmss[SYMCRYPT_HASH_MAX_RESULT_SIZE]; + BYTE SkPrf[SYMCRYPT_HASH_MAX_RESULT_SIZE]; + +} SYMCRYPT_XMSS_KEY; + +typedef SYMCRYPT_XMSS_KEY* PSYMCRYPT_XMSS_KEY; + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmssComputePublicRoot( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams, + _In_reads_bytes_( cbSeed ) PCBYTE pbSeed, + SIZE_T cbSeed, + _In_reads_bytes_( cbSkXmss ) PCBYTE pbSkXmss, + SIZE_T cbSkXmss, + _Out_writes_bytes_( cbRoot ) PBYTE pbRoot, + SIZE_T cbRoot ); +// +// Compute public root value from SEED and SK_XMSS +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmsskeyVerifyRoot( + _In_ PCSYMCRYPT_XMSS_KEY pKey ); +// +// Verifies that the public root matches the private key by recomputing it +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmssVerifyInternal( + _Inout_ PSYMCRYPT_XMSS_KEY pKey, + _In_reads_bytes_( cbMessage ) PCBYTE pbMessage, + SIZE_T cbMessage, + UINT32 flags, + _In_reads_bytes_( cbSignature ) PCBYTE pbSignature, + SIZE_T cbSignature ); +// +// The function that actually does the signature verification. This one doesn't +// run the self-tests so that it can be called from the self-test function. +// + + +VOID +SYMCRYPT_CALL +SymCryptHbsGetWinternitzLengths( + UINT32 n, // data size in bytes + UINT32 w, // digit length in bits (Winternitz coefficient) + _Out_ PUINT32 puLen1, // number of w-bit digits in n + _Out_ PUINT32 puLen2 // number of w-bit digits to store the checksum len1 * (2^w - 1) + ); + +typedef struct _SYMCRYPT_TREEHASH_NODE +{ + UINT32 index; + UINT32 height; + BYTE value[SYMCRYPT_ANYSIZE_ARRAY]; +} SYMCRYPT_TREEHASH_NODE, * PSYMCRYPT_TREEHASH_NODE; + +#define SYMCRYPT_SIZEOF_TREEHASH_NODE(cbValue) (sizeof(SYMCRYPT_TREEHASH_NODE) - 1 + (cbValue)) + +#define SYMCRYPT_TREEHASH_NODE_GET(aNodes, cbValue, i) ((PSYMCRYPT_TREEHASH_NODE)((PBYTE)(aNodes) + (i) * SYMCRYPT_SIZEOF_TREEHASH_NODE(cbValue))) + + +typedef struct _SYMCRYPT_XMSS_INCREMENTAL_TREEHASH_CONTEXT +{ + PCSYMCRYPT_XMSS_PARAMS pParams; + PCBYTE pbSeed; + XMSS_ADRS adrs; + +} SYMCRYPT_XMSS_INCREMENTAL_TREEHASH_CONTEXT, * PSYMCRYPT_XMSS_INCREMENTAL_TREEHASH_CONTEXT; + + +typedef +VOID +(SYMCRYPT_CALL *PSYMCRYPT_INCREMENTAL_TREEHASH_FUNC)( + _In_ PSYMCRYPT_TREEHASH_NODE pNodeLeft, + _In_ PSYMCRYPT_TREEHASH_NODE pNodeRight, + _Out_ PSYMCRYPT_TREEHASH_NODE pNodeOut, + _Inout_ PSYMCRYPT_XMSS_INCREMENTAL_TREEHASH_CONTEXT pContext ); + + +typedef struct _SYMCRYPT_INCREMENTAL_TREEHASH +{ + UINT32 cbNode; // node size; height + hash result + UINT32 nSize; // current size of the stack + UINT32 nCapacity; // maximum items + UINT32 nLastLeafIndex; + PSYMCRYPT_INCREMENTAL_TREEHASH_FUNC funcCompressNodes; + PSYMCRYPT_XMSS_INCREMENTAL_TREEHASH_CONTEXT pContext; + + SYMCRYPT_TREEHASH_NODE arrNodes[SYMCRYPT_ANYSIZE_ARRAY]; + +} SYMCRYPT_INCREMENTAL_TREEHASH, *PSYMCRYPT_INCREMENTAL_TREEHASH; + + +PSYMCRYPT_INCREMENTAL_TREEHASH +SYMCRYPT_CALL +SymCryptHbsIncrementalTreehashInit( + UINT32 nLeaves, + PBYTE pbBuffer, + SIZE_T cbBuffer, + UINT32 cbHashResult, + PSYMCRYPT_INCREMENTAL_TREEHASH_FUNC funcCompressNodes, + PSYMCRYPT_XMSS_INCREMENTAL_TREEHASH_CONTEXT pContext); + +PSYMCRYPT_TREEHASH_NODE +SYMCRYPT_CALL +SymCryptHbsIncrementalTreehashGetNode( + _In_ PSYMCRYPT_INCREMENTAL_TREEHASH pIncHash, + SIZE_T index ); + +PSYMCRYPT_TREEHASH_NODE +SYMCRYPT_CALL +SymCryptHbsIncrementalTreehashAllocNode( + _Inout_ PSYMCRYPT_INCREMENTAL_TREEHASH pIncHash, + UINT32 nLeafIndex ); + +VOID +SYMCRYPT_CALL +SymCryptHbsIncrementalTreehashGetTopNodes( + _Inout_ PSYMCRYPT_INCREMENTAL_TREEHASH pIncHash, + _Out_ PSYMCRYPT_TREEHASH_NODE *ppNodeLeft, + _Out_ PSYMCRYPT_TREEHASH_NODE *ppNodeRight ); + +PSYMCRYPT_TREEHASH_NODE +SYMCRYPT_CALL +SymCryptHbsIncrementalTreehashProcessCommon( + _Inout_ PSYMCRYPT_INCREMENTAL_TREEHASH pIncHash, + BOOLEAN fFinal ); + +PSYMCRYPT_TREEHASH_NODE +SYMCRYPT_CALL +SymCryptHbsIncrementalTreehashProcess( + _Inout_ PSYMCRYPT_INCREMENTAL_TREEHASH pIncHash); + +PSYMCRYPT_TREEHASH_NODE +SYMCRYPT_CALL +SymCryptHbsIncrementalTreehashFinalize( + _Inout_ PSYMCRYPT_INCREMENTAL_TREEHASH pIncHash); + +UINT32 +SYMCRYPT_CALL +SymCryptHbsIncrementalTreehashStackDepth( + UINT32 nLeaves); + +SIZE_T +SYMCRYPT_CALL +SymCryptHbsSizeofScratchBytesForIncrementalTreehash( + UINT32 cbNode, + UINT32 nLeaves); + +UINT32 +SYMCRYPT_CALL +SymCryptHbsGetDigit( + UINT32 width, + _In_ PCBYTE pbBuffer, + SIZE_T cbBuffer, + UINT32 index); + +// +// LMS +// +#define SYMCRYPT_IS_VALID_WINTERNITZ_WIDTH(w) ( ((w) == 1) || ((w) == 2) || ((w) == 4) || ((w) == 8) ) +#define SYMCRYPT_LMS_KEY_PAIR_IDENTIFIER_SIZE 16 +#define SYMCRYPT_LMS_MAX_N 32 +#define SYMCRYPT_LMS_MAX_P 265 +#define SYMCRYPT_LMS_MAX_H 25 +#define SYMCRYPT_LMS_MAX_CUSTOM_TREE_HEIGHT 31 +#define SYMCRYPT_LMS_CHECKSUM_SIZE 16 + +// LmsAlgId || LmsOtsAlgId || I || RootNode +#define SYMCRYPT_LMS_PUB_KEY_SIZE(cbHashOutput) (8 + SYMCRYPT_LMS_KEY_PAIR_IDENTIFIER_SIZE + cbHashOutput) + +// LmsAlgId || LmsOtsAlgId || I || RootNode || NextUnusedLeaf || Seed +#define SYMCRYPT_LMS_PRIV_KEY_SIZE(cbHashOutput) (SYMCRYPT_LMS_PUB_KEY_SIZE(cbHashOutput) + sizeof(UINT32) + cbHashOutput) + +//========================================================================== +// LMS internal structures +//========================================================================== +typedef SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_LMS_KEY{ + SIZE_T cbSize; + SYMCRYPT_LMS_PARAMS params; + + // Leaf number of the next LM-OTS private key that has not yet been used + UINT64 nNextUnusedLeaf; + + // The key type, can be: SYMCRYPT_LMSKEY_TYPE_PUBLIC, or SYMCRYPT_LMSKEY_TYPE_PRIVATE + UINT32 keyType; + + // Key identifier + BYTE abId[SYMCRYPT_LMS_KEY_PAIR_IDENTIFIER_SIZE]; + + // Public key root + BYTE abPublicRoot[SYMCRYPT_LMS_MAX_N]; + + // Private key seed + BYTE abSeed[SYMCRYPT_LMS_MAX_N]; + + SYMCRYPT_MAGIC_FIELD +} SYMCRYPT_LMS_KEY; +typedef SYMCRYPT_LMS_KEY* PSYMCRYPT_LMS_KEY; +typedef const SYMCRYPT_LMS_KEY* PCSYMCRYPT_LMS_KEY; + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptLmsVerifyInternal( + _In_ PCSYMCRYPT_LMS_KEY pKey, + _In_reads_bytes_(cbMessage) PCBYTE pbMessage, + SIZE_T cbMessage, + UINT32 flags, + _In_reads_bytes_(cbSignature) PCBYTE pbSignature, + SIZE_T cbSignature); +// +// This function carries out the actual LMS verification process. It's essential to prevent an infinite +// recursive call in SymCryptLmsVerifySelftest. +// + + +// Atomics. +// +// We define all our SymCrypt atomics below. Different compilers/environments have different +// intrinsics to handle atomics in different environments. +// +// The SymCrypt atomics take the form SYMCRYPT_ATOMIC_<Operation><Bitsize>_<Return>_<Ordering> +// +// <Operation> is the atomic operation (i.e. LOAD, OR, XOR, AND, ADD, INC, etc.) +// <Bitsize> indicates the bitsize of the values that the atomic operation operates on. Pointers to +// values which atomics operate on must be aligned to the size of the value. +// <Return> takes the value PRE or POST, indicating whether the return value of the atomic is the +// value of the destination before (PRE) or after (POST) the operation was performed. Not used when +// operation is LOAD! +// <Ordering> specifies the memory ordering of the atomic operation in relation to other loads/stores +// and can take one of the following values: +// RELAXED corresponds to relaxed memory ordering in C++11 +// SEQ_CST corresponds to sequentially consistent memory ordering in C++11 +// ACQUIRE corresponds to acquire memory ordering in C++11 +// RELEASE corresponds to release memory ordering in C++11 +// + +#if SYMCRYPT_PLATFORM_WINDOWS +#include <intrin.h> + +#if SYMCRYPT_CPU_ARM64 +// 64b loads are naturally atomic on Arm64 +#define SYMCRYPT_ATOMIC_LOAD64_RELAXED(_dest) SYMCRYPT_FORCE_READ64(_dest) +#define SYMCRYPT_ATOMIC_OR32_PRE_RELAXED(_dest, _val) _InterlockedOr_nf( (volatile LONG *)(_dest), (LONG)(_val) ) +#define SYMCRYPT_ATOMIC_ADD32_PRE_RELAXED(_dest, _val) _InterlockedExchangeAdd_nf( (volatile LONG *)(_dest), (LONG)(_val) ) +#define SYMCRYPT_ATOMIC_ADD64_POST_RELAXED(_dest, _val) _InterlockedAdd64_nf( (volatile LONG64 *)(_dest), (LONG64)(_val) ) + +#define SYMCRYPT_ATOMIC_ADD32_POST_SEQ_CST(_dest, _val) _InterlockedAdd( (volatile LONG *)(_dest), (LONG)(_val) ) + +#define SYMCRYPT_ATOMIC_LOADPTR_ACQUIRE(_dest) ((PVOID)__ldar64( (volatile UINT64 *)(_dest) )) +#define SYMCRYPT_ATOMIC_STOREPTR_RELEASE(_dest, _val) __stlr64( (volatile UINT64 *)(_dest), (UINT64)(_val) ) + +// For ARM/ARM64, MSVC does not have a dedicated acquire-release CAS intrinsic. +#define SYMCRYPT_ATOMIC_CAS_PTR_ACQUIRE_RELEASE( _dest, _exchange, _comp ) \ + _InterlockedCompareExchangePointer( (volatile PVOID *)(_dest), (PVOID)(_exchange), (PVOID)(_comp) ) + +#elif SYMCRYPT_CPU_ARM +#define SYMCRYPT_ATOMIC_LOAD64_RELAXED(_dest) _InterlockedOr64_nf( (volatile LONG64 *)(_dest), 0 ) +#define SYMCRYPT_ATOMIC_OR32_PRE_RELAXED(_dest, _val) _InterlockedOr_nf( (volatile LONG *)(_dest), (LONG)(_val) ) +#define SYMCRYPT_ATOMIC_ADD32_PRE_RELAXED(_dest, _val) _InterlockedExchangeAdd_nf( (volatile LONG *)(_dest), (LONG)(_val) ) +#define SYMCRYPT_ATOMIC_ADD64_POST_RELAXED(_dest, _val) _InterlockedAdd64_nf( (volatile LONG64 *)(_dest), (LONG64)(_val) ) + +#define SYMCRYPT_ATOMIC_ADD32_POST_SEQ_CST(_dest, _val) _InterlockedAdd( (volatile LONG *)(_dest), (LONG)(_val) ) + +#define SYMCRYPT_ATOMIC_LOADPTR_ACQUIRE(_dest) ((PVOID)_InterlockedOr_acq( (volatile LONG *)(_dest), 0 )) +#define SYMCRYPT_ATOMIC_STOREPTR_RELEASE(_dest, _val) _InterlockedExchangePointer_rel( (volatile PVOID *)(_dest), (PVOID)(_val) ) + +#define SYMCRYPT_ATOMIC_CAS_PTR_ACQUIRE_RELEASE( _dest, _exchange, _comp ) \ + _InterlockedCompareExchangePointer( (volatile PVOID *)(_dest), (PVOID)(_exchange), (PVOID)(_comp) ) + +#elif SYMCRYPT_CPU_AMD64 +// For MSVC on AMD64, there are no _nf atomic intrinsics +// 64b loads are naturally atomic on AMD64 +#define SYMCRYPT_ATOMIC_LOAD64_RELAXED(_dest) SYMCRYPT_FORCE_READ64(_dest) +#define SYMCRYPT_ATOMIC_OR32_PRE_RELAXED(_dest, _val) _InterlockedOr( (volatile LONG *)(_dest), (LONG)(_val) ) +#define SYMCRYPT_ATOMIC_ADD32_PRE_RELAXED(_dest, _val) _InterlockedExchangeAdd( (volatile LONG *)(_dest), (LONG)(_val) ) +#define SYMCRYPT_ATOMIC_ADD64_POST_RELAXED(_dest, _val) (_InterlockedExchangeAdd64( (volatile LONG64 *)(_dest), (LONG64)(_val) ) + (LONG64)(_val)) + +#define SYMCRYPT_ATOMIC_ADD32_POST_SEQ_CST(_dest, _val) (_InterlockedExchangeAdd( (volatile LONG *)(_dest), (LONG)(_val) ) + (LONG)(_val)) + +// Volatile load / store are sufficient for acquire-release semantics on AMD64 +#define SYMCRYPT_ATOMIC_LOADPTR_ACQUIRE(_dest) ((PVOID)SYMCRYPT_FORCE_READ64(_dest)) +#define SYMCRYPT_ATOMIC_STOREPTR_RELEASE(_dest, _val) SYMCRYPT_FORCE_WRITE64(_dest, ((UINT64)(_val))) + +#define SYMCRYPT_ATOMIC_CAS_PTR_ACQUIRE_RELEASE( _dest, _exchange, _comp ) \ + _InterlockedCompareExchangePointer( (volatile PVOID *)(_dest), (PVOID)(_exchange), (PVOID)(_comp) ) + +#elif SYMCRYPT_CPU_X86 +// For MSVC on x86, there is no 64b atomic load intrinsic - use expected to fail CAS, attempting to set from 0 to 0 +#define SYMCRYPT_ATOMIC_LOAD64_RELAXED(_dest) _InterlockedCompareExchange64( (volatile LONG64 *)(_dest), 0, 0 ) +// For MSVC on x86, there are no _nf atomic intrinsics +#define SYMCRYPT_ATOMIC_OR32_PRE_RELAXED(_dest, _val) _InterlockedOr( (volatile LONG *)(_dest), (LONG)(_val) ) +#define SYMCRYPT_ATOMIC_ADD32_PRE_RELAXED(_dest, _val) _InterlockedExchangeAdd( (volatile LONG *)(_dest), (LONG)(_val) ) +// For MSVC on x86, there is no 64b atomic add intrinsic +// We could use InterlockedAdd64 function from windows.h if we are using MSVC for Windows, but +// to remove dependency we just define our own inline function using _InterlockedCompareExchange64 +static +FORCEINLINE +LONG64 +SymCryptInlineInterlockedAdd64( volatile LONG64* destination, LONG64 value ) +{ + LONG64 preValue; + do { + preValue = *destination; + } while (_InterlockedCompareExchange64(destination, preValue + value, preValue) != preValue); + + return preValue + value; +} +#define SYMCRYPT_ATOMIC_ADD64_POST_RELAXED(_dest, _val) SymCryptInlineInterlockedAdd64( (volatile LONG64 *)(_dest), (LONG64)(_val) ) + +#define SYMCRYPT_ATOMIC_ADD32_POST_SEQ_CST(_dest, _val) (_InterlockedExchangeAdd( (volatile LONG *)(_dest), (LONG)(_val) ) + (LONG)(_val)) + +// Volatile load / store are sufficient for acquire-release semantics on x86 +#define SYMCRYPT_ATOMIC_LOADPTR_ACQUIRE(_dest) ((PVOID)SYMCRYPT_FORCE_READ32(_dest)) +#define SYMCRYPT_ATOMIC_STOREPTR_RELEASE(_dest, _val) SYMCRYPT_FORCE_WRITE32(_dest, ((UINT32)(_val))) + +#define SYMCRYPT_ATOMIC_CAS_PTR_ACQUIRE_RELEASE( _dest, _exchange, _comp ) \ + _InterlockedCompareExchangePointer( (volatile PVOID *)(_dest), (PVOID)(_exchange), (PVOID)(_comp) ) + +#else + +// Fallback intended to generically work across all supported platforms for cases where +// we do not make decisions based on CPU architecture, such as no ASM builds. For the most +// part the same as x86 except in cases where the underlying definition relies on pointer size. + +#define SYMCRYPT_ATOMIC_LOAD64_RELAXED(_dest) _InterlockedCompareExchange64( (volatile LONG64 *)(_dest), 0, 0 ) +#define SYMCRYPT_ATOMIC_OR32_PRE_RELAXED(_dest, _val) _InterlockedOr( (volatile LONG *)(_dest), (LONG)(_val) ) +#define SYMCRYPT_ATOMIC_ADD32_PRE_RELAXED(_dest, _val) _InterlockedExchangeAdd( (volatile LONG *)(_dest), (LONG)(_val) ) + +FORCEINLINE +LONG64 +SymCryptInlineInterlockedAdd64( volatile LONG64* destination, LONG64 value ) +{ + LONG64 preValue; + do { + preValue = *destination; + } while (_InterlockedCompareExchange64(destination, preValue + value, preValue) != preValue); + + return preValue + value; +} +#define SYMCRYPT_ATOMIC_ADD64_POST_RELAXED(_dest, _val) SymCryptInlineInterlockedAdd64( (volatile LONG64 *)(_dest), (LONG64)(_val) ) + +#define SYMCRYPT_ATOMIC_ADD32_POST_SEQ_CST(_dest, _val) (_InterlockedExchangeAdd( (volatile LONG *)(_dest), (LONG)(_val) ) + (LONG)(_val)) + +#if defined(_WIN64) +#define SYMCRYPT_ATOMIC_LOADPTR_ACQUIRE(_dest) ((PVOID)_InterlockedOr64( (volatile LONG64 *)(_dest), 0 )) +#else +#define SYMCRYPT_ATOMIC_LOADPTR_ACQUIRE(_dest) ((PVOID)_InterlockedOr( (volatile LONG *)(_dest), 0 )) +#endif + +#define SYMCRYPT_ATOMIC_STOREPTR_RELEASE(_dest, _val) _InterlockedExchangePointer( (volatile PVOID *)(_dest), (PVOID)(_val) ) + +#define SYMCRYPT_ATOMIC_CAS_PTR_ACQUIRE_RELEASE( _dest, _exchange, _comp ) \ + _InterlockedCompareExchangePointer( (volatile PVOID *)(_dest), (PVOID)(_exchange), (PVOID)(_comp) ) + +#endif + +#elif SYMCRYPT_GNUC +#define SYMCRYPT_ATOMIC_LOAD64_RELAXED(_dest) __atomic_load_n( (volatile uint64_t *)(_dest), __ATOMIC_RELAXED ) +#define SYMCRYPT_ATOMIC_OR32_PRE_RELAXED(_dest, _val) __atomic_fetch_or( (volatile uint32_t *)(_dest), (uint32_t)(_val), __ATOMIC_RELAXED ) +#define SYMCRYPT_ATOMIC_ADD32_PRE_RELAXED(_dest, _val) __atomic_fetch_add( (volatile uint32_t *)(_dest), (uint32_t)(_val), __ATOMIC_RELAXED ) +#define SYMCRYPT_ATOMIC_ADD64_POST_RELAXED(_dest, _val) __atomic_add_fetch( (volatile uint64_t *)(_dest), (uint64_t)(_val), __ATOMIC_RELAXED ) + +#define SYMCRYPT_ATOMIC_ADD32_POST_SEQ_CST(_dest, _val) __atomic_add_fetch( (volatile uint32_t *)(_dest), (uint32_t)(_val), __ATOMIC_ACQ_REL ) + +#define SYMCRYPT_ATOMIC_LOADPTR_ACQUIRE(_dest) __atomic_load_n( (volatile void* *)(_dest), __ATOMIC_ACQUIRE ) +#define SYMCRYPT_ATOMIC_STOREPTR_RELEASE(_dest, _val) __atomic_store_n( (volatile void* *)(_dest), (void*)(_val), __ATOMIC_RELEASE ) + +static +FORCEINLINE +void* +SymCryptAtomicCasPtrAcqRel( + void** dest, + void* desired, + void* expected) +{ + __atomic_compare_exchange_n( + dest, // ptr + &expected, + desired, + FALSE, // weak (set to FALSE => strong) + __ATOMIC_RELEASE, // success_memorder + __ATOMIC_ACQUIRE ); // failure_memorder + return expected; +} + +#define SYMCRYPT_ATOMIC_CAS_PTR_ACQUIRE_RELEASE( _dest, _exchange, _comp ) \ + SymCryptAtomicCasPtrAcqRel( (volatile void **)(_dest), (void *)(_exchange), (void *)(_comp) ) + +#endif + +// Inline CAS-128 functions + +// BOOLEAN +// SymCryptAtomicCas128Relaxed( +// _Inout_updates_(2) PUINT64 destination, +// _Inout_updates_(2) PUINT64 expectedValue, +// _In_reads_(2) PCUINT64 desiredValue); +// Performs Compare-and-Swap on a 128b memory location. +// Atomically reads destination, compares with expectedValue, and: +// if they are equal, writes desiredValue to destination, and return TRUE +// if they are not equal, writes the value read from destination to expectedValue, and returns FALSE +// +// Remarks: +// On success, the value of expectedValue is not guaranteed. +// Only destination is guaranteed to be read and written atomically, expectedValue should be a buffer +// which is only owned by the calling thread. +// destination must be aligned to 16 bytes +// + +#if SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM64 + +#if SYMCRYPT_PLATFORM_WINDOWS + +#if SYMCRYPT_CPU_ARM64 +#define SYMCRYPT_MSVC_CAS128_NF _InterlockedCompareExchange128_nf +#elif SYMCRYPT_CPU_AMD64 +#define SYMCRYPT_MSVC_CAS128_NF _InterlockedCompareExchange128 +#endif + +static +FORCEINLINE +BOOLEAN +SymCryptAtomicCas128Relaxed( + _Inout_updates_(2) PUINT64 destination, + _Inout_updates_(2) PUINT64 expectedValue, + _In_reads_(2) PCUINT64 desiredValue) +{ + return SYMCRYPT_MSVC_CAS128_NF( + (volatile LONG64 *)destination, + (LONG64)desiredValue[1], + (LONG64)desiredValue[0], + (LONG64 *) expectedValue ); +} + +#elif SYMCRYPT_GNUC + +static +FORCEINLINE +BOOLEAN +SymCryptAtomicCas128Relaxed( + _Inout_updates_(2) PUINT64 destination, + _Inout_updates_(2) PUINT64 expectedValue, + _In_reads_(2) PCUINT64 desiredValue) +{ +#if SYMCRYPT_CPU_AMD64 + // To avoid dynamically linking libatomic in OpenEnclave, use inline assembly for cmpxchg16b + // on AMD64. We always need to perform CPU feature detection before we hit this function. + BOOLEAN result; + __asm__ __volatile__ + ( + "lock cmpxchg16b %1\n\t" + "sete %0" + : "=r" (result) + , "+m" (*destination) + , "+d" (expectedValue[1]) + , "+a" (expectedValue[0]) + : "c" (desiredValue[1]) + , "b" (desiredValue[0]) + : "cc" + ); + return result; +#elif SYMCRYPT_CPU_ARM64 + // clang inlines this but GCC dynamically links to libatomic + // For now, just let the compiler decide, and for ARM64 modules, always allow linking to libatomic + // We may want to break out into inline asm for LDXP/STXP implementation (v8.0) vs. CASP + // implementation (v8.1) in future + return __atomic_compare_exchange( + (__int128 *)destination, // ptr + (__int128 *)expectedValue, // expected + (__int128 *)desiredValue, // desired + FALSE, // weak (set to FALSE => strong) + __ATOMIC_RELAXED, // success_memorder + __ATOMIC_RELAXED); // failure_memorder +#endif +} + +#endif + +#endif + +static +FORCEINLINE +UINT32 +SymCryptCountTrailingZeros32( UINT32 value ) +{ + unsigned long index = 0; + if( value == 0 ) + { + return 32; + } + +#if SYMCRYPT_PLATFORM_WINDOWS && (SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM64 | SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_ARM) + _BitScanForward(&index, value); +#elif SYMCRYPT_GNUC + index = __builtin_ctz(value); +#else + while( (value & 1) == 0 ) + { + index++; + value >>= 1; + } +#endif + + return (UINT32)index; +} + +static +FORCEINLINE +UINT32 +SymCryptCountTrailingZeros64( UINT64 value ) +{ + unsigned long index = 0; + if( value == 0 ) + { + return 64; + } + +#if SYMCRYPT_PLATFORM_WINDOWS && (SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM64) + _BitScanForward64(&index, value); +#elif SYMCRYPT_PLATFORM_WINDOWS && (SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_ARM) + if( ((UINT32)value) == 0 ) + { + _BitScanForward(&index, (UINT32)(value>>32)); + index += 32; + } else { + _BitScanForward(&index, (UINT32)value); + } + +#elif SYMCRYPT_GNUC + index = __builtin_ctzll(value); +#else + while( (value & 1) == 0 ) + { + index++; + value >>= 1; + } +#endif + + return (UINT32)index; +} + +static +FORCEINLINE +UINT32 +SymCryptCountLeadingZeros32( UINT32 value ) +{ + unsigned long zeros = 0; + + if(value == 0) + { + return 32; + } + +#if SYMCRYPT_PLATFORM_WINDOWS && (SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM64 | SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_ARM) + _BitScanReverse(&zeros, value); + zeros = 31 - zeros; +#elif SYMCRYPT_GNUC + zeros = __builtin_clz(value); +#else + while( (value & 0x80000000) == 0 ) + { + zeros++; + value <<= 1; + } +#endif + + return (UINT32)zeros; +} + +static +FORCEINLINE +UINT32 +SymCryptCountLeadingZeros64( UINT64 value ) +{ + unsigned long zeros = 0; + + if(value == 0) + { + return 64; + } + +#if SYMCRYPT_PLATFORM_WINDOWS && (SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_ARM64) + _BitScanReverse64(&zeros, value); + zeros = 63 - zeros; +#elif SYMCRYPT_PLATFORM_WINDOWS && (SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_ARM) + if( (value >> 32) == 0 ) + { + _BitScanReverse(&zeros, (UINT32)value); + zeros = 63 - zeros; + } else { + _BitScanReverse(&zeros, (UINT32)(value >> 32)); + zeros = 31 - zeros; + } +#elif SYMCRYPT_GNUC + zeros = __builtin_clzll(value); +#else + while( (value & 0x8000000000000000) == 0 ) + { + zeros++; + value <<= 1; + } +#endif + + return (UINT32)zeros; +} diff --git a/libs/symcrypt/lib/sc_lib_mldsa.h b/libs/symcrypt/lib/sc_lib_mldsa.h new file mode 100644 index 00000000000..54dafbabeaa --- /dev/null +++ b/libs/symcrypt/lib/sc_lib_mldsa.h @@ -0,0 +1,1081 @@ +// +// sc_lib_mldsa.h +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// Internal ML-DSA definitions for the symcrypt library. +// Always intended to be included as part of sc_lib.h +// + +// +// Modulus for ML-DSA +// +#define SYMCRYPT_MLDSA_Q (8380417) + +// +// Montgomery multiplier for ML-DSA, log 2 (i.e. R = 2^32) +// +#define SYMCRYPT_MLDSA_R_LOG2 (32) + +// +// Size of the root seed xi used in key generation +// +#define SYMCRYPT_MLDSA_ROOT_SEED_SIZE (32) + +// +// Size of the public seed rho +// +#define SYMCRYPT_MLDSA_PUBLIC_SEED_SIZE (32) + +// +// Size of public key hash (tr) = SHAKE256 result size = 64 bytes +// +#define SYMCRYPT_MLDSA_PUBLIC_KEY_HASH_SIZE SYMCRYPT_SHAKE256_RESULT_SIZE + +// +// Size of private signing seed K +// +#define SYMCRYPT_MLDSA_PRIVATE_SIGNING_SEED_SIZE (32) + +// +// Size of the private vector seed rho prime +// +#define SYMCRYPT_MLDSA_PRIVATE_VECTOR_SEED_SIZE (64) + +// +// Size of random value used in signing (rnd in FIPS 204) +// +#define SYMCRYPT_MLDSA_SIGNING_RANDOM_SIZE (32) + +// +// Length of hash algorithm OIDs in bytes. Currently all supported hash algorithms have 11-byte +// OIDs, but this is not guaranteed to be the case as more algorithms are added in the future. +// If the OID length becomes variable, functions which use this value will need to be changed. +// +#define SYMCRYPT_MLDSA_SUPPORTED_HASH_OID_SIZE (11) + +// +// Flag for Sign and Verify with External Mu +// +#define SYMCRYPT_FLAG_MLDSA_EXTERNALMU (0x1) + +typedef struct _SYMCRYPT_MLDSA_POLYELEMENT { + // PolyElements just store the coefficients without any header. + UINT32 coeffs[SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS]; +} SYMCRYPT_MLDSA_POLYELEMENT, * PSYMCRYPT_MLDSA_POLYELEMENT; +typedef const SYMCRYPT_MLDSA_POLYELEMENT* PCSYMCRYPT_MLDSA_POLYELEMENT; + +// Maximum number of rows and columns in A matrix for ML-DSA +#define SYMCRYPT_MLDSA_VECTOR_MAX_LENGTH (8) +#define SYMCRYPT_MLDSA_MATRIX_MAX_NROWS (8) +#define SYMCRYPT_MLDSA_MATRIX_MAX_NCOLS (7) + +typedef _Struct_size_bytes_( cbTotalSize ) struct _SYMCRYPT_MLDSA_VECTOR { + _Field_range_( 1, SYMCRYPT_MLDSA_VECTOR_MAX_LENGTH ) + UINT8 nElems; // Number of PolyElements in the vector + UINT32 cbTotalSize; // Total size of the Vector + + // Followed by: + // nElems PolyElements +} SYMCRYPT_MLDSA_VECTOR, * PSYMCRYPT_MLDSA_VECTOR; +typedef const SYMCRYPT_MLDSA_VECTOR* PCSYMCRYPT_MLDSA_VECTOR; + +typedef _Struct_size_bytes_( cbTotalSize ) struct _SYMCRYPT_MLDSA_MATRIX { + _Field_range_( 1, SYMCRYPT_MLDSA_MATRIX_MAX_NROWS ) + UINT8 nRows; // k in FIPS-204 + _Field_range_( 1, SYMCRYPT_MLDSA_MATRIX_MAX_NCOLS ) + UINT8 nCols; // l in FIPS-204 + UINT32 cbTotalSize; // Total size of the Matrix + + // Followed by: + // nRows*nCols PolyElements in row-major order +} SYMCRYPT_MLDSA_MATRIX, * PSYMCRYPT_MLDSA_MATRIX; +typedef const SYMCRYPT_MLDSA_MATRIX* PCSYMCRYPT_MLDSA_MATRIX; + +typedef struct _SYMCRYPT_MLDSA_INTERNAL_PARAMS { + UINT32 params; // parameter set of ML-DSA being used - takes a value from SYMCRYPT_MLDSA_PARAMS + + UINT32 cbPolyElement; // size in bytes of one polynomial ring element + UINT32 cbRowVector; // size in bytes of one row vector (k elements) + UINT32 cbColVector; // size in bytes of one column vector (l elements) + UINT32 cbMatrix; // size in bytes of one matrix + + UINT8 nRows; // Number of rows in the A matrix (k in FIPS-204) + UINT8 nCols; // Number of columns in the A matrix (l in FIPS-204) + + UINT8 privateKeyRange; // Coefficient range of s1, s2 private key vectors (eta in FIPS-204) + UINT8 encodedCoefficientBitLength; // Bit length of encoded private key coefficients + + UINT8 nChallengeNonZeroCoeffs; // Number of non-zero coefficients in the challenge polynomial (tau in FIPS-204) + UINT8 nHintNonZeroCoeffs; // Max number of non-zero coefficients in the hint polynomial (omega in FIPS-204) + UINT8 maskCoefficientRangeLog2; // Coefficient range of mask polynomial y (log_2(gamma_1) in FIPS-204) + UINT8 commitmentModulus; // Modulus for commitment values in UseHint and MakeHint (q-1)/(2*gamma_2) + UINT32 decomposeR1Factor; // Multiplication factor for R1 in SymCryptMlDsaDecompose - see function comments + UINT32 commitmentRoundingRange; // Rounding range for commitment value (gamma_2 in FIPS-204) + UINT32 w1EncodeCoefficientBitLength; // Bit length of coefficients for w1 encoding (q - 1) / ((2 * gamma_2) - 1)) + + UINT32 cbCommitmentHash; // Size of the commitment hash (lambda / 4 in FIPS 204) + UINT32 cbEncodedPrivateKey; // Size of the encoded private key + UINT32 cbEncodedPublicKey; // Size of the encoded public key + UINT32 cbEncodedSignature; // Size of the encoded signature +} SYMCRYPT_MLDSA_INTERNAL_PARAMS, * PSYMCRYPT_MLDSA_INTERNAL_PARAMS; +typedef const SYMCRYPT_MLDSA_INTERNAL_PARAMS* PCSYMCRYPT_MLDSA_INTERNAL_PARAMS; + +typedef _Struct_size_bytes_( cbTotalSize ) struct _SYMCRYPT_MLDSAKEY { + UINT32 fAlgorithmInfo; // Tracks which algorithms the key can be used in (not currently used) + // Also tracks which per-key selftests have been performed on this key + // A bitwise OR of SYMCRYPT_FLAG_KEY_*, SYMCRYPT_FLAG_MLDSAKEY_*, and + // SYMCRYPT_SELFTEST_KEY_* values + + PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams; + + UINT32 cbTotalSize; // Total in-memory size of the ML-DSA key (this header and the following structs) + + BOOLEAN hasRootSeed; // True if the key has the seed used in key generation (xi) + BOOLEAN hasPrivateKey; // True if the key has private vectors s1, s2, t0 + + // Seeds + _When_( hasRootSeed, _Field_size_bytes_(SYMCRYPT_MLDSA_ROOT_SEED_SIZE) ) + _When_( !hasRootSeed, _Field_size_bytes_part_(SYMCRYPT_MLDSA_ROOT_SEED_SIZE, 0) ) + BYTE rootSeed[SYMCRYPT_MLDSA_ROOT_SEED_SIZE]; // Root seed used in key generation (xi) - only available for keys generated by SymCrypt, or imported from a seed + + _When_( hasPrivateKey, _Field_size_bytes_(SYMCRYPT_MLDSA_PRIVATE_SIGNING_SEED_SIZE) ) + _When_( !hasPrivateKey, _Field_size_bytes_part_(SYMCRYPT_MLDSA_PRIVATE_SIGNING_SEED_SIZE, 0) ) + BYTE privateSigningSeed[SYMCRYPT_MLDSA_PRIVATE_SIGNING_SEED_SIZE]; // Private seed used in signing (K) + + BYTE publicSeed[SYMCRYPT_MLDSA_PUBLIC_SEED_SIZE]; // Public seed from which A can be derived (rho) + BYTE publicKeyHash[SYMCRYPT_MLDSA_PUBLIC_KEY_HASH_SIZE]; // SHAKE-256 hash of the public key + + // + // ML-DSA matrix/vector components: A * s1 + s2 = t + // + // t is separated into two components, t0 and t1, using Power2Round. t0 is private and is used + // during signing; t1 is public and is used during verification. All components are stored in + // NTT form so that we do not need to convert them during signing or verification. + // + + // Public components - always valid + PSYMCRYPT_MLDSA_MATRIX pmA; // Public matrix A - size nRows x nCols + PSYMCRYPT_MLDSA_VECTOR pvt1; // Public component of t vector from Power2Round (row vector) + + // Private components - only valid when hasPrivateKey is TRUE + PSYMCRYPT_MLDSA_VECTOR pvs1; // Private vector s1 (column vector) + PSYMCRYPT_MLDSA_VECTOR pvs2; // Private vector s2 (row vector) + PSYMCRYPT_MLDSA_VECTOR pvt0; // Private component of t vector from Power2Round (row vector) + + SYMCRYPT_MAGIC_FIELD + // Followed by: + // A + // t1 + // s1 + // s2 + // t0 +} SYMCRYPT_MLDSAKEY, * PSYMCRYPT_MLDSAKEY; +typedef const SYMCRYPT_MLDSAKEY* PCSYMCRYPT_MLDSAKEY; + +typedef _Struct_size_bytes_(cbTotalSize) struct _SYMCRYPT_MLDSA_INTERNAL_COMPUTATION_TEMPORARIES +{ + UINT32 cbTotalSize; // Total in-memory size of this structure + UINT32 nRowVectors; // Number of row vectors + UINT32 nColVectors; // Number of column vectors + UINT32 nPolyElements; // Number of PolyElements + UINT32 cbScratch; // Size of scratch buffer + + + SYMCRYPT_SHAKE256_STATE shake256State; + + _Field_size_( nRowVectors ) + PSYMCRYPT_MLDSA_VECTOR* pvRowVectors; // Array of pointers to row vectors + _Field_size_( nColVectors ) + PSYMCRYPT_MLDSA_VECTOR* pvColVectors; // Array of pointers to column vectors + _Field_size_( nPolyElements) + PSYMCRYPT_MLDSA_POLYELEMENT* pePolyElements; // Array of pointers to PolyElements + + _Field_size_bytes_( cbScratch ) + PBYTE pbScratch; + + SYMCRYPT_MAGIC_FIELD + // Followed by: + // pvRowVectors[0..nRowVectors-1] + // pvColVectors[0..nColVectors-1] + // pePolyElements[0..nPolyElements-1] + // nRowVectors * SYMCRYPT_INTERNAL_MLDSA_SIZEOF_VECTOR( nRows ) buffer for row vectors + // nColVectors * SYMCRYPT_INTERNAL_MLDSA_SIZEOF_VECTOR( nCols ) buffer for column vectors + // nPoly * SYMCRYPT_INTERNAL_MLDSA_SIZEOF_POLYELEMENT buffer for PolyElements + // cbScratch bytes of scratch space +} SYMCRYPT_MLDSA_INTERNAL_COMPUTATION_TEMPORARIES, * PSYMCRYPT_MLDSA_INTERNAL_COMPUTATION_TEMPORARIES; + +#define SYMCRYPT_INTERNAL_MLDSA_SIZEOF_POLYELEMENT ( sizeof( SYMCRYPT_MLDSA_POLYELEMENT ) ) +#define SYMCRYPT_INTERNAL_MLDSA_SIZEOF_VECTOR( _nElems ) ( sizeof( SYMCRYPT_MLDSA_VECTOR ) + ( _nElems * sizeof( SYMCRYPT_MLDSA_POLYELEMENT ) ) ) +#define SYMCRYPT_INTERNAL_MLDSA_SIZEOF_MATRIX( _nRows, _nCols ) ( sizeof( SYMCRYPT_MLDSA_MATRIX ) + ( _nRows * _nCols * sizeof( SYMCRYPT_MLDSA_POLYELEMENT ) ) ) +#define SYMCRYPT_INTERNAL_MLDSA_SIZEOF_KEY( _nRows, _nCols ) ( sizeof( SYMCRYPT_MLDSAKEY) + \ + SYMCRYPT_INTERNAL_MLDSA_SIZEOF_MATRIX( _nRows, _nCols ) + \ + SYMCRYPT_INTERNAL_MLDSA_SIZEOF_VECTOR( _nCols ) + \ + (SYMCRYPT_INTERNAL_MLDSA_SIZEOF_VECTOR( _nRows ) * 3u) ) + +#define SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT_OFFSET( _row ) ( sizeof( SYMCRYPT_MLDSA_VECTOR ) + (_row * SYMCRYPT_INTERNAL_MLDSA_SIZEOF_POLYELEMENT) ) +#define SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT( _row, _pVector ) ((PSYMCRYPT_MLDSA_POLYELEMENT) ( ((PBYTE) (_pVector)) + SYMCRYPT_INTERNAL_MLDSA_VECTOR_ELEMENT_OFFSET( _row ) )) +#define SYMCRYPT_INTERNAL_MLDSA_MATRIX_ELEMENT_OFFSET( _row, _col, _pMatrix ) ( sizeof( SYMCRYPT_MLDSA_MATRIX ) + ((_row * (_pMatrix)->nCols + _col) * SYMCRYPT_INTERNAL_MLDSA_SIZEOF_POLYELEMENT) ) +#define SYMCRYPT_INTERNAL_MLDSA_MATRIX_ELEMENT( _row, _col, _pMatrix) ((PSYMCRYPT_MLDSA_POLYELEMENT) ( ((PBYTE) (_pMatrix)) + SYMCRYPT_INTERNAL_MLDSA_MATRIX_ELEMENT_OFFSET( _row, _col, _pMatrix ) )) + +#define SYMCRYPT_INTERNAL_MLDSA_SIZEOF_ENCODED_VECTOR( _pVector, _nBitsPerCoeff ) ( ((_pVector)->nElems * SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS * (_nBitsPerCoeff) ) / 8u ) + +// For packing signed coefficients into the minimum possible number of bits for encoding, ML-DSA +// converts them to (signed upper bound - x) for each coefficient x. For example, when encoding +// s1 and s2 which have coefficients in the range [-eta, eta] with ML-DSA-65 (eta = 4), 1 is encoded +// as (4 - 1) = 3, 0 is encoded as (4 - 0) = 4, -1 is encoded as (4 - (-1)) = 5, etc. Conveniently, +// this also works in reverse to decode the coefficients. +#define SYMCRYPT_INTERNAL_MLDSA_SHORT_COEFFICIENT_ENCODE_DECODE( _val, _bound) ( _bound - _val ) + +////////////////////////////////////////////////////////////////////////// +// Internal implementations of public APIs +////////////////////////////////////////////////////////////////////////// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaKeyGenerateEx( + _Inout_ PSYMCRYPT_MLDSAKEY pkMlDsakey, + _In_reads_( cbRootSeed ) PCBYTE pbRootSeed, + SIZE_T cbRootSeed, + UINT32 flags ); +// +// Implements SymCryptMlDsakeyGenerate. Takes a seed from the caller so that keys can be generated +// deterministically for testing. +// +// Parameters: +// - (pbRootSeed, cbRootSeed): The seed used to generate the key (xi in FIPS 204) +// +// See SymCryptMlDsakeyGenerate for additional documentation. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaSignEx( + _In_ PCSYMCRYPT_MLDSAKEY pkMlDsakey, + _In_reads_( cbInput ) PCBYTE pbInput, + SIZE_T cbInput, + _In_reads_opt_( cbContext ) PCBYTE pbContext, + _In_range_( 0, SYMCRYPT_MLDSA_CONTEXT_MAX_LENGTH ) SIZE_T cbContext, + _In_reads_opt_( cbHashOid ) PCBYTE pbHashOid, + SIZE_T cbHashOid, + _In_reads_( cbRandom ) PCBYTE pbRandom, + SIZE_T cbRandom, + UINT32 flags, + _Out_writes_( cbSignature ) PBYTE pbSignature, + SIZE_T cbSignature ); +// +// Implements SymCryptMlDsaSign, SymCryptExternalMuMlDsaSign, and SymCryptHashMlDsaSign. +// Takes the random value from the caller so that signing can be done deterministically for testing. +// +// Parameters: +// - (pbInput, cbInput): The message to be signed. For SymCryptMlDsaSign, this is the full message. +// For SymCryptHashMlDsaSign, this is the hash of the message. +// - (pbContext, cbContext): An optional context string which will be prepended to the message. +// - (pbHashOid, cbHashOid): The DER-encoded OID of the hash algorithm used to hash the message, +// when using SymCryptHashMlDsaSign. Must be NULL for SymCryptMlDsaSign. +// - (pbRandom, cbRandom): The random value used in the signing process (rnd in FIPS 204). +// - flags: 0 or SYMCRYPT_FLAG_MLDSA_EXTERNALMU. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaVerifyEx( + _In_ PCSYMCRYPT_MLDSAKEY pkMlDsakey, + _In_reads_( cbInput ) PCBYTE pbInput, + SIZE_T cbInput, + _In_reads_opt_( cbContext ) PCBYTE pbContext, + _In_range_( 0, SYMCRYPT_MLDSA_CONTEXT_MAX_LENGTH ) SIZE_T cbContext, + _In_reads_opt_( cbHashOid ) PCBYTE pbHashOid, + SIZE_T cbHashOid, + _In_reads_( cbSignature ) PCBYTE pbSignature, + SIZE_T cbSignature, + UINT32 flags ); +// +// Implements SymCryptMlDsaVerify, SymCryptExternalMuMlDsaVerify, and SymCryptHashMlDsaVerify. +// +// Parameters: +// - (pbInput, cbInput): The message to be verified. For SymCryptMlDsaVerify, this is the full +// message. For SymCryptHashMlDsaVerify, this is the hash of the message. +// - (pbContext, cbContext): An optional context string which will be prepended to the message. +// - (pbHashOid, cbHashOid): The DER-encoded OID of the hash algorithm used to hash the message, +// when using SymCryptHashMlDsaVerify. Must be NULL for SymCryptMlDsaVerify. +// - (pbSignature, cbSignature): The signature to be verified. +// - flags: 0 or SYMCRYPT_FLAG_MLDSA_EXTERNALMU. +// + +_Success_( TRUE ) +PSYMCRYPT_MLDSAKEY +SYMCRYPT_CALL +SymCryptMlDsakeyInitialize( + _In_ PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pInternalParams, + _Out_writes_bytes_(cbKey) PBYTE pbKey, + UINT32 cbKey ); +// +// Initializes a SYMCRYPT_MLDSAKEY structure in the given buffer. The buffer size (cbKey) must +// be exactly equal to the size of the key structure, which can be calculated using +// SYMCRYPT_INTERNAL_MLDSA_SIZEOF_KEY. +// +// Parameters: +// - pInternalParams: Parameter set to use for the key. +// - (pbKey, cbKey): Buffer for the key structure. +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsakeyComputeT( + _In_ PCSYMCRYPT_MLDSA_MATRIX pmA, + _In_ PCSYMCRYPT_MLDSA_VECTOR pvs1, + _In_ PCSYMCRYPT_MLDSA_VECTOR pvs2, + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvt0, + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvt1, + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvTmp, + _Inout_ PSYMCRYPT_MLDSA_POLYELEMENT peTmp ); +// +// Helper function for computing the t vector in ML-DSA: A * s1 + s2 = t. Used by key generation +// and private key import. All inputs must be in NTT form. The outputs t0 and t1 are NOT returned +// in NTT form; it is the caller's responsibility to convert them when appropriate. +// +// Parameters: +// - pmA: Public matrix A +// - pvs1: Private vector s1. +// - pvs2: Private vector s2. +// - pvt0: Private component of t vector from Power2Round. +// - pvt1: Public component of t vector from Power2Round. +// - pvTmp: Temporary vector for intermediate computations. +// - peTmp: Temporary PolyElement for intermediate computations. +// + +////////////////////////////////////////////////////////////////////////// +// Montgomery reduction and multiplication +////////////////////////////////////////////////////////////////////////// + +UINT32 +SYMCRYPT_CALL +SymCryptMlDsaMontReduce( UINT64 a ); +// +// Montgomery reduction +// res = a * R^-1 mod Q. +// +// Note that this divides out a factor of R. +// + +UINT32 +SYMCRYPT_CALL +SymCryptMlDsaMontMul( UINT32 a, UINT32 b ); +// +// Montgomery multiplication +// res = (a * b) / R mod Q +// +// Equivalent to SymCryptMlDsaMontReduce( (UINT64) a * b ) +// As above, this divides out a factor of R, which can be compensated for in either input, +// or taken into account in the output. +// + +////////////////////////////////////////////////////////////////////////// +// 32-bit modular arithmetic +////////////////////////////////////////////////////////////////////////// + +UINT32 +SYMCRYPT_CALL +SymCryptMlDsaModAdd( UINT32 a, UINT32 b ); +// +// res := a + b mod Q +// +// Requirements: a < Q, b < Q +// + +UINT32 +SYMCRYPT_CALL +SymCryptMlDsaModSub( UINT32 a, UINT32 b ); +// +// res := a - b mod Q +// +// Requirements: a < Q, b < Q +// + +////////////////////////////////////////////////////////////////////////// +// Polynomial operations +////////////////////////////////////////////////////////////////////////// + +_Success_( TRUE ) +PSYMCRYPT_MLDSA_POLYELEMENT +SYMCRYPT_CALL +SymCryptMlDsaPolyElementCreate( + _Inout_updates_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer ); +// +// Initializes a SYMCRYPT_MLDSA_POLYELEMENT in the given buffer. +// cbBuffer must be equal to SYMCRYPT_INTERNAL_MLDSA_SIZEOF_POLYELEMENT. +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaPolyElementSetZero( + _Inout_ PSYMCRYPT_MLDSA_POLYELEMENT peDst ); +// +// Sets all coefficients to zero +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaPolyElementNTT( + _Inout_ PSYMCRYPT_MLDSA_POLYELEMENT peSrc ); +// +// ML-DSA Polynomial Ring Element NTT: +// peSrc = NTT(peSrc) per FIPS 204 +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaPolyElementINTT( + _Inout_ PSYMCRYPT_MLDSA_POLYELEMENT peSrc ); +// +// ML-DSA Polynomial Ring Element inverse NTT: +// peSrc = InverseNTT(peSrc) per FIPS 204 +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaPolyElementMulR( + _Inout_ PSYMCRYPT_MLDSA_POLYELEMENT peSrc ); +// +// ML-DSA Polynomial multiplication by the Montgomery multiplier R: +// peSrc = (peSrc * R) mod Q +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaPolyElementMontMul( + _In_ PCSYMCRYPT_MLDSA_POLYELEMENT peSrc1, + _In_ PCSYMCRYPT_MLDSA_POLYELEMENT peSrc2, + _Out_ PSYMCRYPT_MLDSA_POLYELEMENT peDst ); +// +// ML-DSA Polynomial Montgomery multiplication: +// peDst = (peSrc1 * peSrc2) ./ R +// where: +// * is polynomial multiplication given sources in NTT form +// ./ is coefficient-wise division and R is the Montgomery multiplier +// +// Requirements: +// - peSrc1 and peSrc2 must be PolyElements in ML-DSA NTT form +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaPolyElementAdd( + _In_ PCSYMCRYPT_MLDSA_POLYELEMENT peSrc1, + _In_ PCSYMCRYPT_MLDSA_POLYELEMENT peSrc2, + _Out_ PSYMCRYPT_MLDSA_POLYELEMENT peDst ); +// +// ML-DSA Polynomial Ring Element addition +// peDst = peSrc1 + peSrc2 +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaPolyElementSub( + _In_ PCSYMCRYPT_MLDSA_POLYELEMENT peSrc1, + _In_ PCSYMCRYPT_MLDSA_POLYELEMENT peSrc2, + _Out_ PSYMCRYPT_MLDSA_POLYELEMENT peDst ); +// +// ML-DSA Polynomial Ring Element subtraction +// peDst = peSrc1 - peSrc2 +// + +////////////////////////////////////////////////////////////////////////// +// Vector operations +////////////////////////////////////////////////////////////////////////// + +_Success_( TRUE ) +PSYMCRYPT_MLDSA_VECTOR +SYMCRYPT_CALL +SymCryptMlDsaVectorCreate( + _Out_writes_( cbBuffer ) PBYTE pbBuffer, + UINT32 cbBuffer, + UINT8 nElems ); +// +// Initializes a vector of nElems PolyElements in the given buffer. +// cbBuffer must be equal to SYMCRYPT_INTERNAL_MLDSA_SIZEOF_VECTOR( nElems ). +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaVectorCopy( + _In_ PCSYMCRYPT_MLDSA_VECTOR pvSrc, + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvDst ); +// +// pvDst = pvSrc. Vectors must be the same size. +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaVectorSetZero( + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvDst ); +// +// Sets all elements of the vector to zero. +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaVectorAdd( + _In_ PCSYMCRYPT_MLDSA_VECTOR pvSrc1, + _In_ PCSYMCRYPT_MLDSA_VECTOR pvSrc2, + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvDst ); +// +// pvDst = pvSrc1 + pvSrc2 +// +// Requirements: pvSrc1, pvSrc2, and pvDst must all have the same number of elements. +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaVectorSub( + _In_ PCSYMCRYPT_MLDSA_VECTOR pvSrc1, + _In_ PCSYMCRYPT_MLDSA_VECTOR pvSrc2, + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvDst ); +// +// pvDst = pvSrc1 - pvSrc2 +// +// Requirements: pvSrc1, pvSrc2, and pvDst must all have the same number of elements. +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaVectorPolyElementMontMul( + _In_ PCSYMCRYPT_MLDSA_VECTOR pvSrc1, + _In_ PCSYMCRYPT_MLDSA_POLYELEMENT peSrc2, + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvDst ); +// +// ML-DSA Vector-PolyElement Montgomery Multiplication: +// pvDst[i] = (pvSrc1[i] * peSrc2) ./ R +// +// where: +// * is polynomial multiplication given sources in NTT form +// ./ is coefficient-wise division and R is Montgomery multiplier +// +// Requirements: +// - peSrc2, and all elements of pvSrc1 must be in ML-DSA NTT form +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaVectorNTT( + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvSrc ); +// +// ML-DSA Vector NTT: +// pvSrc[i] = NTT(pvSrc[i]) for each element in pvSrc +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaVectorINTT( + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvSrc ); +// +// ML-DSA Vector inverse NTT: +// pvSrc[i] = INTT(pvSrc[i]) for each element in pvSrc +// + + +////////////////////////////////////////////////////////////////////////// +// Matrix operations +////////////////////////////////////////////////////////////////////////// + +PSYMCRYPT_MLDSA_MATRIX +SYMCRYPT_CALL +SymCryptMlDsaMatrixCreate( + _Out_writes_( cbBuffer ) PBYTE pbBuffer, + UINT32 cbBuffer, + UINT8 nRows, + UINT8 nCols ); +// +// Initializes a matrix of nRows * nCols PolyElements in the given buffer. +// cbBuffer must be equal to SYMCRYPT_INTERNAL_MLDSA_SIZEOF_MATRIX( nRows, nCols ). +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaMatrixVectorMontMul( + _In_ PCSYMCRYPT_MLDSA_MATRIX pmSrc1, + _In_ PCSYMCRYPT_MLDSA_VECTOR pvSrc2, + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvDst, + _Inout_ PSYMCRYPT_MLDSA_POLYELEMENT peTmp ); +// +// ML-DSA Matrix-Vector Montgomery Multiplication: +// pvDst = (pmSrc1 * pvSrc2) ./ R +// +// where: +// * is matrix-vector multiplication of polynomials in NTT form +// ./ is coefficient-wise division and R is Montgomery multiplier +// + + +////////////////////////////////////////////////////////////////////////// +// Sampling and rejection +////////////////////////////////////////////////////////////////////////// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaRejNttPoly( + _In_reads_( cbRejNttPolySeed ) PCBYTE pbRejNttPolySeed, + SIZE_T cbRejNttPolySeed, + _Inout_ PSYMCRYPT_MLDSA_POLYELEMENT peDst ); +// +// RejNTTPoly from FIPS 204 +// Used by SymCryptMlDsaExpandA to generate a polynomials in the public matrix A from the expanded +// public seed. The output polynomial is in NTT form with coefficients modulo Q. +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaExpandA( + _In_reads_( cbPublicSeed ) PCBYTE pbPublicSeed, + SIZE_T cbPublicSeed, + _Inout_ PSYMCRYPT_MLDSA_MATRIX pmA ); +// +// ExpandA from FIPS 204 +// Expands the public seed into the public matrix A. +// \hat{A}[i, j] = RejNttPoly(seed || j || i) for each index (i, j) in A +// + +INT8 +SYMCRYPT_CALL +SymCryptMlDsaCoeffFromHalfByte( + _In_ PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + _In_range_( 0, 15 ) UINT8 halfByte ); +// +// CoeffFromHalfByte from FIPS 204 +// Converts a nibble (range [0, 15]) to a coefficient in the range [-eta, eta] +// If the nibble is outside of the valid private key range ([0, 14] for eta = 2, [0, 8] for eta = 4), +// returns INT8_MIN. +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaRejBoundedPoly( + _In_ PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + _In_reads_( cbRejBoundedPolySeed ) PCBYTE pbRejBoundedPolySeed, + SIZE_T cbRejBoundedPolySeed, + _Inout_ PSYMCRYPT_MLDSA_POLYELEMENT peDst ); +// +// RejBoundedPoly from FIPS 204 +// Used by SymCryptMlDsaExpandS to generate polynomials in the private vectors s1 and s2 from the +// expanded private vector seed. Coefficients in the output polynomial are modulo Q. +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaExpandS( + _In_ PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + _In_reads_( cbPrivateVectorSeed ) PCBYTE pbPrivateVectorSeed, + SIZE_T cbPrivateVectorSeed, + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvs1, + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvs2 ); +// +// ExpandS from FIPS 204 +// s1 = RejBoundedPoly(seed || i) for each index i in s1 (column vector) +// s2 = RejBoundedPoly(seed || i) for each index i in s2 (row vector) +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaSampleInBall( + _In_ PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + _In_reads_( cbCommitmentHash ) PCBYTE pbCommitmentHash, + SIZE_T cbCommitmentHash, + _Inout_ PSYMCRYPT_MLDSA_POLYELEMENT peChallenge ); +// +// SampleInBall from FIPS 204 +// Samples a polynomial c in R_q with coefficients in {-1, 0, 1} and Hamming weight tau. +// As with all polynomials, coefficients are represented as unsigned integers modulo Q. +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaExpandMask( + _In_ PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + _Inout_ PSYMCRYPT_SHAKE256_STATE pShakeState, + _In_reads_( cbPrivateRandom ) PCBYTE pbPrivateRandom, + SIZE_T cbPrivateRandom, + _In_ UINT16 counter, + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvMask ); +// +// ExpandMask from FIPS 204 +// Samples a polynomial vector y in R^l such that each polynomial y[r] has coefficients between +// (-gamma_1 + 1, gamma_1) modulo Q, where gamma_1 == 2^(maskCoefficientRangeLog2) . The output +// vector is returned in NTT form. +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaMakeHint( + _In_ PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvWMinusCs2, + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvWMinusCs2PlusCt0, + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvDst, + _Out_ UINT32* nBitsSet ); +// +// MakeHint from FIPS 204 +// Computes the hint vector. Each coefficient of the polynomials in the vector is a single bit +// indicating whether adding ct0 to (w - cs2) alters the high bits of the corresponding coefficient. +// We define our inputs differently from FIPS 204 to reduce computations: +// +// In FIPS 204, MakeHint is defined as: +// [[r1 != v1]] where r1 = HighBits(r), v1 = HighBits(r + z) +// +// ML-DSA.Sign_internal calls MakeHint with inputs: +// z = -ct0, r = w - cs2 + ct0 +// +// We can simplify this to: +// r1 = HighBits(w - cs2 + ct0), v1 = HighBits(w - cs2) +// +// Note that this function modifies the inputs in place for efficiency. +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaUseHint( + _In_ PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + _In_ PCSYMCRYPT_MLDSA_VECTOR pvHint, + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvCommitment ); +// +// UseHint from FIPS 204 +// Uses the hint vector to recalculate the original commitment vector from the approximated +// commitment vector by setting the high bits of the coefficients that were dropped in the +// approximation. On input, pvCommitment is the approximated commitment vector. On output, it is +// the recalculated original commitment vector. +// +// TODO osgvsowi/55435592 Consider decoding the hint just-in-time to avoid allocating an +// entire vector for it +// + +////////////////////////////////////////////////////////////////////////// +// Encoding/decoding +////////////////////////////////////////////////////////////////////////// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaPolyElementEncode( + _In_ PCSYMCRYPT_MLDSA_POLYELEMENT peSrc, + UINT32 nBitsPerCoefficient, + UINT32 signedCoefficientBound, + _Out_writes_( nBitsPerCoefficient * (SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8) ) + PBYTE pbDst ); +// +// Encode a polynomial with coefficients in the range [0, 2^nBitsPerCoefficient] into a tightly +// packed byte array. +// +// Signed coefficients are encoded as described in the comment for +// SYMCRYPT_INTERNAL_MLDSA_SHORT_COEFFICIENT_ENCODE_DECODE. For these coefficients, the +// signedCoefficientBound parameter indicates the upper bound of the coefficients when they are +// positive, and is used to convert them from their internal representation modulo Q to the +// encoded representation. +// +// For polynomials whose coefficients are always positive and do not need any special encoding +// (e.g. t1), signedCoefficientBound must be 0. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaPolyElementDecode( + _In_reads_bytes_( nBitsPerCoefficient * (SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8) ) + PCBYTE pbSrc, + UINT32 nBitsPerCoefficient, + UINT32 signedCoefficientBound, + _Inout_ PSYMCRYPT_MLDSA_POLYELEMENT peDst ); +// +// From a byte array that was previously encoded as described in SymCryptMlDsaPolyElementEncode, +// decode a polynomial with coefficients in the range [0, 2^nBitsPerCoefficient]. +// +// See comments on SymCryptMlDsaPolyElementEncode for information about how coefficients are +// encoded and decoded. +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaVectorEncode( + _In_ PCSYMCRYPT_MLDSA_VECTOR pvSrc, + UINT32 nBitsPerCoefficient, + UINT32 signedCoefficientBound, + _Out_writes_( pvSrc->nElems * nBitsPerCoefficient * (SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8) ) + PBYTE pbDst ); +// +// Encodes a vector of polynomials into a tightly packed byte array. +// pbDst := SymCryptMlDsaPolyElementEncode(i) for each polynomial i in pvSrc +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaVectorDecode( + _In_reads_bytes_( pvDst->nElems * nBitsPerCoefficient * (SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8) ) + PCBYTE pbSrc, + UINT32 nBitsPerCoefficient, + UINT32 signedCoefficientBound, + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvDst ); +// +// Decodes a vector of encoded polynomials from a byte array. +// pvDst[i] := SymCryptMlDsaPolyElementDecode(i) for each encoded polynomial i in pbSrc +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaPkEncode( + _In_ PCSYMCRYPT_MLDSAKEY pkMlDsakey, + _Out_writes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst ); +// +// pkEncode(key) = rho || SimpleBitPack(t1) +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaPkDecode( + _In_reads_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + UINT32 flags, + _Inout_ PSYMCRYPT_MLDSAKEY pkMlDsakey ); +// +// Decodes a public key from a byte array. The encoded public key only contains rho and t1. +// We recalculate the A matrix from rho. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaSkEncode( + _In_ PCSYMCRYPT_MLDSAKEY pKey, + _Out_writes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst ); +// +// skEncode(key) = rho || K || H(pkEncode(key)) || BitPack(s1) || BitPack(s2) || BitPack(t0) +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaSkDecode( + _In_reads_( cbSrc ) PCBYTE pbSrc, + SIZE_T cbSrc, + UINT32 flags, + _Inout_ PSYMCRYPT_MLDSAKEY pKey ); +// +// Decodes a private key from a byte array. The encoded private key contains rho, K, s1, s2 and t0. +// We recalculate the A matrix from rho, t1 by recalculating A * s1 + s2 = t. This function also +// validates that the recalculated public key hash and t0 match the encoded values. If they do +// not, it returns SYMCRYPT_INVALID_BLOB. +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaSigEncode( + _In_ PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + _In_reads_( cbCommitmentHash ) PBYTE pbCommitmentHash, + SIZE_T cbCommitmentHash, + _In_ PCSYMCRYPT_MLDSA_VECTOR pvResponse, + _In_ PCSYMCRYPT_MLDSA_VECTOR pvHint, + _Out_writes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst ); +// +// SigEncode from FIPS 204 +// Encodes a signature into a tightly packed byte array. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaSigDecode( + _In_ PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + _In_reads_( cbSig ) PCBYTE pbSig, + SIZE_T cbSig, + _Out_writes_( cbCommitmentHash) PBYTE pbCommitmentHash, + SIZE_T cbCommitmentHash, + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvResponse, + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvHint ); +// +// SigDecode from FIPS 204 +// Decodes a signature from a tightly packed byte array, producing the commitment hash, response +// vector, and hint vector. +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaHintBitPack( + _In_ PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + _In_ PCSYMCRYPT_MLDSA_VECTOR pvSrc, + _Out_writes_bytes_( pParams->nHintNonZeroCoeffs + pvSrc->nElems ) + PBYTE pbDst ); +// +// HintBitPack from FIPS 204 +// Packs the hint vector into a byte array. The first nHintNonZeroCoeffs bytes are the indices +// of non-zero coefficients in the vector, and the last nElems bytes contain the number of +// non-zero coefficients in polynomials 0..i of the vector. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaHintBitUnpack( + _In_ PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + _In_reads_bytes_( pParams->nHintNonZeroCoeffs + pvDst->nElems ) + PCBYTE pbSrc, + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvDst ); +// +// HintBitUnpack from FIPS 204 +// Unpacks the hint vector from a byte array where each byte indicates the index of a non-zero +// coefficient in the corresponding polynomial. See comment on SymCryptMlDsaHintBitPack for more +// details about encoding. +// + +////////////////////////////////////////////////////////////////////////// +// Auxiliary functions +////////////////////////////////////////////////////////////////////////// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlDsaGetInternalParamsFromParams( + SYMCRYPT_MLDSA_PARAMS params, + _Out_ PCSYMCRYPT_MLDSA_INTERNAL_PARAMS* pInternalParams ); +// +// Get the internal parameter structure corresponding to the given parameter set enum. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptHashMlDsaValidateHashAlgAndGetOid( + _In_ PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + SYMCRYPT_PQDSA_HASH_ID hashAlg, + SIZE_T cbHash, + _Out_ PCSYMCRYPT_OID* ppOid ); +// +// Validates that the given hash algorithm meets the required collision strength for the ML-DSA +// parameter set, as defined in FIPS 204. Also validates that cbHash matches the expected length +// for the hash algorithm, or for XOFs, is >= the required collision strength. +// See comments on the definition of SymCryptHashMlDsaSign +// + +INT32 +SYMCRYPT_CALL +SymCryptMlDsaModPlusMinus( UINT32 r, UINT32 modulus ); +// +// Helper function which implements the mod+- operation from FIPS 204. +// In FIPS 204, r0 := r mod+- 2^d where mod+- returns the unique element in (-(2^d/2), 2^d/2] +// which is congruent to r modulo 2^d. Importantly, this means that r0 may be negative. +// To use consistent data structures throughout the our implementation and simplify modular +// arithmetic, we do not use negative numbers. Instead, we always represent negative values as +// UINT32s modulo Q. +// +// Requirements: r < modulus +// + +UINT32 +SYMCRYPT_CALL +SymCryptMlDsaPolyElementInfinityNorm( _In_ PCSYMCRYPT_MLDSA_POLYELEMENT peSrc ); +// +// Returns the infinity norm of the given polynomial element as defined in FIPS 204. +// The infinity norm is the maximum absolute value of w mod+- Q for each coefficient w in the +// polynomial. +// + +UINT32 +SYMCRYPT_CALL +SymCryptMlDsaVectorInfinityNorm( _In_ PCSYMCRYPT_MLDSA_VECTOR pvSrc ); +// +// Returns the infinity norm of the given vector as defined in FIPS 204. +// = max(InfinityNorm(pvSrc[i])) for each polynomial in pvSrc +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaDecompose( + _In_ PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + _In_range_(0, SYMCRYPT_MLDSA_Q - 1) UINT32 r, + _Out_opt_ UINT32 *puR1, + _Out_opt_ UINT32 *puR0 ); +// +// Decompose from FIPS 204 +// Decomposes r into (r1, r0) such that r1*2*gamma_2 + r0 is congruent to r modulo q +// See note above in SymCryptMlDsaModPlusMinus for important information about the +// representation of r0. +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaVectorHighBits( + _In_ PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + _In_ PCSYMCRYPT_MLDSA_VECTOR pvSrc, + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvDst ); +// +// HighBits from FIPS 204 +// For each coefficent r of each polynomial in pvSrc, the corresponding coefficient in pvDst is +// set to *puR1 from Decompose(r). +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaVectorLowBits( + _In_ PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + _In_ PCSYMCRYPT_MLDSA_VECTOR pvSrc, + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvDst ); +// +// LowBits from FIPS 204 +// For each coefficent r of each polynomial in pvSrc, the corresponding coefficient in pvDst is +// set to *puR0 from Decompose(r). +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaPower2Round( + _In_range_(0, SYMCRYPT_MLDSA_Q - 1) UINT32 r, + _Out_ UINT32 *puR1, + _Out_ UINT32 *puR0 ); +// +// Power2Round from FIPS 204 +// Decomposes r into (r1, r0) such that r1*2^d + r0 is congruent to r modulo q +// See note above in SymCryptMlDsaModPlusMinus for important information about the +// representation of r0. +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaPolyElementPower2Round( + _In_ PCSYMCRYPT_MLDSA_POLYELEMENT peSrc, + _Inout_ PSYMCRYPT_MLDSA_POLYELEMENT peDst1, + _Inout_ PSYMCRYPT_MLDSA_POLYELEMENT peDst0 ); +// +// (peDst1[i], peDst0[i]) = Power2Round(peSrc[i]) for each coefficient in peSrc +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaVectorPower2Round( + _In_ PCSYMCRYPT_MLDSA_VECTOR pvSrc, + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvDst1, + _Inout_ PSYMCRYPT_MLDSA_VECTOR pvDst0 ); +// +// (pvDst1[i], pvDst0[i]) = Power2Round(pvSrc[i]) for each polynomial in pvSrc +// + +UINT32 +SYMCRYPT_CALL +SymCryptMlDsaSignedCoefficientModQ( INT32 coefficient ); +// +// Maps a signed short coefficient to a residue modulo Q. +// + +_Success_( return != NULL ) +PSYMCRYPT_MLDSA_INTERNAL_COMPUTATION_TEMPORARIES +SYMCRYPT_CALL +SymCryptMlDsaTemporariesAllocateAndInitialize( + _In_ PCSYMCRYPT_MLDSA_INTERNAL_PARAMS pParams, + UINT32 nRowVectors, + UINT32 nColVectors, + UINT32 nPolyElements, + UINT32 cbScratch ); +// +// Allocates and initializes a SYMCRYPT_MLDSA_INTERNAL_COMPUTATION_TEMPORARIES structure and +// returns a pointer to the caller. Returns NULL if allocation fails. +// + +VOID +SYMCRYPT_CALL +SymCryptMlDsaTemporariesFree( + _In_ _Post_invalid_ PSYMCRYPT_MLDSA_INTERNAL_COMPUTATION_TEMPORARIES pTemporaries ); +// +// Wipes and frees a SYMCRYPT_MLDSA_INTERNAL_COMPUTATION_TEMPORARIES structure previously allocated +// by SymCryptMlDsaTemporariesAllocateAndInitialize. +// diff --git a/libs/symcrypt/lib/sc_lib_mlkem.h b/libs/symcrypt/lib/sc_lib_mlkem.h new file mode 100644 index 00000000000..15c2ff91ce5 --- /dev/null +++ b/libs/symcrypt/lib/sc_lib_mlkem.h @@ -0,0 +1,468 @@ +// +// sc_lib_mlkem.h +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// Internal ML-KEM definitions for the symcrypt library. +// Always intended to be included as part of sc_lib.h +// + +//===================================================== +// ML-KEM internal high level types +// + +typedef SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_MLKEM_POLYELEMENT { + // PolyElements just store the coefficients without any header. + UINT16 coeffs[SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS]; +} SYMCRYPT_MLKEM_POLYELEMENT; +typedef SYMCRYPT_MLKEM_POLYELEMENT * PSYMCRYPT_MLKEM_POLYELEMENT; +typedef const SYMCRYPT_MLKEM_POLYELEMENT * PCSYMCRYPT_MLKEM_POLYELEMENT; + +typedef SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_MLKEM_POLYELEMENT_ACCUMULATOR { + // PolyElement Accumulators just store the coefficients without any header. + UINT32 coeffs[SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS]; +} SYMCRYPT_MLKEM_POLYELEMENT_ACCUMULATOR; +typedef SYMCRYPT_MLKEM_POLYELEMENT_ACCUMULATOR * PSYMCRYPT_MLKEM_POLYELEMENT_ACCUMULATOR; + +// Currently maximum size of MLKEM matrices is baked in, they are always square and up to 4x4. +#define SYMCRYPT_MLKEM_MATRIX_MAX_NROWS (4) + +typedef SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_MLKEM_VECTOR { + _Field_range_( 1, SYMCRYPT_MLKEM_MATRIX_MAX_NROWS ) + UINT32 nRows; + UINT32 cbTotalSize; // Total size of the Vector + + // Followed by: + // nRows PolyElements +} SYMCRYPT_MLKEM_VECTOR, *PSYMCRYPT_MLKEM_VECTOR; +typedef const SYMCRYPT_MLKEM_VECTOR * PCSYMCRYPT_MLKEM_VECTOR; + +typedef SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_MLKEM_MATRIX { + _Field_range_( 1, SYMCRYPT_MLKEM_MATRIX_MAX_NROWS ) + UINT32 nRows; + UINT32 cbTotalSize; // Total size of the Matrix + + // Array of pointers to PolyElements in row-major order + PSYMCRYPT_MLKEM_POLYELEMENT apPolyElements[SYMCRYPT_MLKEM_MATRIX_MAX_NROWS * SYMCRYPT_MLKEM_MATRIX_MAX_NROWS]; + // Note: the extra indirection is intentional to make transposing the matrix cheap, + // given that in the MLKEM context the underlying PolyElements are relatively large + // so we don't want to move them around + + // Followed by: + // nRows*nRows PolyElements +} SYMCRYPT_MLKEM_MATRIX, *PSYMCRYPT_MLKEM_MATRIX; +typedef const SYMCRYPT_MLKEM_MATRIX * PCSYMCRYPT_MLKEM_MATRIX; + +// +// MLKEMKEY type +// + +#define SYMCRYPT_MLKEMKEY_MAX_SIZEOF_ENCODED_T (1536) + +typedef SYMCRYPT_ALIGN_STRUCT _SYMCRYPT_MLKEM_INTERNAL_PARAMS { + UINT32 params; // parameter set of ML-KEM being used, takes a value from SYMCRYPT_MLKEM_PARAMS + + UINT32 cbPolyElement; // size of one polynomial ring element + UINT32 cbVector; // size of one vector + UINT32 cbMatrix; // size of one matrix + + UINT8 nRows; // corresponds to k from FIPS 203; the number of rows and columns in the matrix A, + // and the number of rows in column vectors s and t + UINT8 nEta1; // corresponds to eta_1 from FIPS 203; number of coinflips used in generating s and e + // in keypair generation, and r in encapsulation + UINT8 nEta2; // corresponds to eta_2 from FIPS 203; number of coinflips used in generating e_1 and + // e_2 in encapsulation + UINT8 nBitsOfU; // corresponds to d_u from FIPS 203; number of bits that the coefficients of the polynomial + // ring elements of u are compressed to in encapsulation for encoding into ciphertext + UINT8 nBitsOfV; // corresponds to d_v from FIPS 203; number of bits that the coefficients of the polynomial + // ring element v is compressed to in encapsulation for encoding into ciphertext +} SYMCRYPT_MLKEM_INTERNAL_PARAMS, *PSYMCRYPT_MLKEM_INTERNAL_PARAMS; +typedef const SYMCRYPT_MLKEM_INTERNAL_PARAMS * PCSYMCRYPT_MLKEM_INTERNAL_PARAMS; + +typedef SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_MLKEMKEY { + UINT32 fAlgorithmInfo; // Tracks which algorithms the key can be used in + // Also tracks which per-key selftests have been performed on this key + // A bitwise OR of SYMCRYPT_FLAG_KEY_*, SYMCRYPT_FLAG_MLKEMKEY_*, and + // SYMCRYPT_SELFTEST_KEY_* values + + SYMCRYPT_MLKEM_INTERNAL_PARAMS params; + + UINT32 cbTotalSize; // Total in-memory size of the ML-KEM key (this header and the following structs) + + BOOLEAN hasPrivateSeed; // Set to true if key has the private seed (d) + BOOLEAN hasPrivateKey; // Set to true if key has the private key (s and z) + + // seeds + BYTE privateSeed[32]; // private seed (d) from which entire private PKE key can be derived + BYTE privateRandom[32]; // private random (z) used in implicit rejection + + BYTE publicSeed[32]; // public seed (rho) from which A can be derived + + // A o s + e = t + PSYMCRYPT_MLKEM_MATRIX pmAtranspose; // public matrix in NTT form (derived from publicSeed) + PSYMCRYPT_MLKEM_VECTOR pvt; // public vector in NTT form + + PSYMCRYPT_MLKEM_VECTOR pvs; // private vector in NTT form + + // misc fields + BYTE encodedT[SYMCRYPT_MLKEMKEY_MAX_SIZEOF_ENCODED_T]; // byte-encoding of public vector + // may only use a prefix of this buffer + BYTE encapsKeyHash[32]; // Precomputed value of hash of ML-KEM's byte-encoding of encapsulation key + + SYMCRYPT_MAGIC_FIELD + // Followed by: + // Atranspose + // t + // s +} SYMCRYPT_MLKEMKEY; + +//===================================================== +// ML-KEM primitives +// + +#define SYMCRYPT_MLKEM_Q (3329) + +#define SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT ( sizeof(SYMCRYPT_MLKEM_POLYELEMENT) ) +#define SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT_ACCUMULATOR ( sizeof(SYMCRYPT_MLKEM_POLYELEMENT_ACCUMULATOR) ) +#define SYMCRYPT_INTERNAL_MLKEM_MAXIMUM_VECTOR_SIZE ( sizeof(SYMCRYPT_MLKEM_VECTOR) + (SYMCRYPT_MLKEM_MATRIX_MAX_NROWS * SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT) ) +#define SYMCRYPT_INTERNAL_MLKEM_VECTOR_ELEMENT_OFFSET( _row ) ( sizeof(SYMCRYPT_MLKEM_VECTOR) + ((_row) * SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT) ) +#define SYMCRYPT_INTERNAL_MLKEM_VECTOR_ELEMENT( _row, _pVector ) (PSYMCRYPT_MLKEM_POLYELEMENT)( (PBYTE)(_pVector) + SYMCRYPT_INTERNAL_MLKEM_VECTOR_ELEMENT_OFFSET(_row) ) + +#define SYMCRYPT_MLKEM_SIZEOF_MAX_CIPHERTEXT (1568UL) +#define SYMCRYPT_MLKEM_SIZEOF_AGREED_SECRET (32UL) +#define SYMCRYPT_MLKEM_SIZEOF_ENCAPS_RANDOM (32UL) + +typedef SYMCRYPT_ASYM_ALIGN_STRUCT _SYMCRYPT_MLKEM_INTERNAL_COMPUTATION_TEMPORARIES { + BYTE abVectorBuffer0[SYMCRYPT_INTERNAL_MLKEM_MAXIMUM_VECTOR_SIZE]; + BYTE abVectorBuffer1[SYMCRYPT_INTERNAL_MLKEM_MAXIMUM_VECTOR_SIZE]; + BYTE abPolyElementBuffer0[SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT]; + BYTE abPolyElementBuffer1[SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT]; + BYTE abPolyElementAccumulatorBuffer[SYMCRYPT_INTERNAL_MLKEM_SIZEOF_POLYRINGELEMENT_ACCUMULATOR]; + union { + SYMCRYPT_SHAKE128_STATE shake128State; + SYMCRYPT_SHAKE256_STATE shake256State; + SYMCRYPT_SHA3_256_STATE sha3_256State; + SYMCRYPT_SHA3_512_STATE sha3_512State; + } hashState0; + union { + SYMCRYPT_SHAKE128_STATE shake128State; + SYMCRYPT_SHAKE256_STATE shake256State; + SYMCRYPT_SHA3_256_STATE sha3_256State; + SYMCRYPT_SHA3_512_STATE sha3_512State; + } hashState1; +} SYMCRYPT_MLKEM_INTERNAL_COMPUTATION_TEMPORARIES; +typedef SYMCRYPT_MLKEM_INTERNAL_COMPUTATION_TEMPORARIES * PSYMCRYPT_MLKEM_INTERNAL_COMPUTATION_TEMPORARIES; + +// exposed here for KAT testing +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlKemEncapsulateInternal( + _In_ PCSYMCRYPT_MLKEMKEY pkMlKemkey, + _Out_writes_bytes_( cbAgreedSecret ) + PBYTE pbAgreedSecret, + SIZE_T cbAgreedSecret, + _Out_writes_bytes_( cbCiphertext ) + PBYTE pbCiphertext, + SIZE_T cbCiphertext, + _In_reads_bytes_( SYMCRYPT_MLKEM_SIZEOF_ENCAPS_RANDOM ) + PCBYTE pbRandom, + _Inout_ PSYMCRYPT_MLKEM_INTERNAL_COMPUTATION_TEMPORARIES pCompTemps ); + +PSYMCRYPT_MLKEM_POLYELEMENT +SYMCRYPT_CALL +SymCryptMlKemPolyElementCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + UINT32 cbBuffer ); + +PSYMCRYPT_MLKEM_POLYELEMENT_ACCUMULATOR +SYMCRYPT_CALL +SymCryptMlKemPolyElementAccumulatorCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + UINT32 cbBuffer ); + +PSYMCRYPT_MLKEM_VECTOR +SYMCRYPT_CALL +SymCryptMlKemVectorCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + UINT32 cbBuffer, + UINT32 nRows ); + +PSYMCRYPT_MLKEM_MATRIX +SYMCRYPT_CALL +SymCryptMlKemMatrixCreate( + _Out_writes_bytes_( cbBuffer ) PBYTE pbBuffer, + UINT32 cbBuffer, + UINT32 nRows ); + +// +// ML-KEM operations acting on individual polynomial ring elements (PolyElements) +// + +VOID +SYMCRYPT_CALL +SymCryptMlKemPolyElementMulAndAccumulate( + _In_ PCSYMCRYPT_MLKEM_POLYELEMENT peSrc1, + _In_ PCSYMCRYPT_MLKEM_POLYELEMENT peSrc2, + _Inout_ PSYMCRYPT_MLKEM_POLYELEMENT_ACCUMULATOR paDst ); +// +// ML-KEM Polynomial Ring Element multiply and add: +// paDst = paDst + (peSrc1 o peSrc2) +// where: +// o is polynomial multiplication given sources in NTT form +// +// Requirements: +// - peSrc1 and peSrc2 must be PolyElements in ML-KEM's NTT form +// - paDst must be in NTT form +// + +VOID +SYMCRYPT_CALL +SymCryptMlKemMontgomeryReduceAndAddPolyElementAccumulatorToPolyElement( + _Inout_ PSYMCRYPT_MLKEM_POLYELEMENT_ACCUMULATOR paSrc, + _Inout_ PSYMCRYPT_MLKEM_POLYELEMENT peDst ); +// +// Montgomery reduce and add a Polynomial Ring Element Accumulator to a Polynomial Ring +// Element, and wipe the accumulator: +// peDst = peDst + (paSrc ./ R) +// paSrc = 0 +// where: +// ./ is coefficient-wise division and R is Montgomery multiplier +// +// - One of the following conditions must be true: +// - paSrc to be pre-multiplied coefficient-wise by R for addition with a canonical +// representation of peDst +// - peDst must be coefficient-wise multiplied by the same constant factor as the +// resulting of (paSrc ./ R) for the addition to make sense +// + +VOID +SYMCRYPT_CALL +SymCryptMlKemPolyElementMulR( + _In_ PCSYMCRYPT_MLKEM_POLYELEMENT peSrc, + _Out_ PSYMCRYPT_MLKEM_POLYELEMENT peDst ); +// +// ML-KEM Polynomial Ring Element multiply each coefficient by Montgomery multiplier R +// peDst = peSrc .* R +// + +VOID +SYMCRYPT_CALL +SymCryptMlKemPolyElementAdd( + _In_ PCSYMCRYPT_MLKEM_POLYELEMENT peSrc1, + _In_ PCSYMCRYPT_MLKEM_POLYELEMENT peSrc2, + _Out_ PSYMCRYPT_MLKEM_POLYELEMENT peDst ); +// +// ML-KEM Polynomial Ring Element addition +// peDst = peSrc1 + peSrc2 +// + +VOID +SYMCRYPT_CALL +SymCryptMlKemPolyElementSub( + _In_ PCSYMCRYPT_MLKEM_POLYELEMENT peSrc1, + _In_ PCSYMCRYPT_MLKEM_POLYELEMENT peSrc2, + _Out_ PSYMCRYPT_MLKEM_POLYELEMENT peDst ); +// +// ML-KEM Polynomial Ring Element subtract: +// peDst = peSrc1 - peSrc2 +// + +VOID +SYMCRYPT_CALL +SymCryptMlKemPolyElementNTT( + _Inout_ PSYMCRYPT_MLKEM_POLYELEMENT peSrc ); +// +// ML-KEM Polynomial Ring Element NTT: +// peSrc = NTT(peSrc) per FIPS 203 +// + +VOID +SYMCRYPT_CALL +SymCryptMlKemPolyElementINTTAndMulR( + _Inout_ PSYMCRYPT_MLKEM_POLYELEMENT peSrc ); +// +// ML-KEM Polynomial Ring Element INTT: +// peSrc = NTTinverse(peSrc) .* R +// where .* is coefficient-wise multiplication and R is Montgomery multiplier +// + +VOID +SYMCRYPT_CALL +SymCryptMlKemPolyElementCompressAndEncode( + _In_ PCSYMCRYPT_MLKEM_POLYELEMENT peSrc, + UINT32 nBitsPerCoefficient, + _Out_writes_bytes_(nBitsPerCoefficient*(SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8)) + PBYTE pbDst ); +// +// ML-KEM Polynomial Ring Element Compress and Encode. +// +// Each coefficient in the ring element is Compressed to nBitsPerCoefficient using +// rounding logic specified in FIPS 203, and the coefficients are encoded +// (packed together densely as 256 contiguous bitfields) into the pbDst buffer. +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlKemPolyElementDecodeAndDecompress( + _In_reads_bytes_(nBitsPerCoefficient*(SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8)) + PCBYTE pbSrc, + UINT32 nBitsPerCoefficient, + _Out_ PSYMCRYPT_MLKEM_POLYELEMENT peDst ); +// +// ML-KEM Polynomial Ring Element Decode and Decompress. +// +// The pbSrc buffer is interpreted as an encoded ring element, with each coefficient +// being represented by nBitsPerCoefficient. The resulting ring element is written to +// peDst. +// + +VOID +SYMCRYPT_CALL +SymCryptMlKemPolyElementSampleNTTFromShake128( + _Inout_ PSYMCRYPT_SHAKE128_STATE pState, + _Out_ PSYMCRYPT_MLKEM_POLYELEMENT peDst ); +// +// Generates an ML-KEM Polynomial Ring Element in NTT form by extracting bytes from +// pre-instantiated SHAKE128 state. +// +// NOTE: we pass the SHAKE state to this function because we do not know up front +// how many bytes need to be extracted. +// + +VOID +SYMCRYPT_CALL +SymCryptMlKemPolyElementSampleCBDFromBytes( + _In_reads_bytes_(eta*2*(SYMCRYPT_MLWE_POLYNOMIAL_COEFFICIENTS / 8) + 1) + PCBYTE pbSrc, + _In_range_(2,3) UINT32 eta, + _Out_ PSYMCRYPT_MLKEM_POLYELEMENT peDst ); +// +// Generates an ML-KEM Polynomial Ring Element in centered binomial distribution +// from input byte array. +// Each coefficient is generated using 2*eta bits. +// + + +// +// ML-KEM operations acting on Linear Algebra objects +// + +VOID +SYMCRYPT_CALL +SymCryptMlKemMatrixTranspose( + _Inout_ PSYMCRYPT_MLKEM_MATRIX pmSrc ); +// +// pmSrc = transpose(pmSrc) +// + +VOID +SYMCRYPT_CALL +SymCryptMlKemMatrixVectorMontMulAndAdd( + _In_ PCSYMCRYPT_MLKEM_MATRIX pmSrc1, + _In_ PCSYMCRYPT_MLKEM_VECTOR pvSrc2, + _Inout_ PSYMCRYPT_MLKEM_VECTOR pvDst, + _Inout_ PSYMCRYPT_MLKEM_POLYELEMENT_ACCUMULATOR paTmp ); +// +// pvDst = ((pmSrc1 o pvSrc2) ./ R) + pvDst +// +// Remarks: +// - paTmp is used internally for temporary storage, it is wiped before and after use +// + +VOID +SYMCRYPT_CALL +SymCryptMlKemVectorMontDotProduct( + _In_ PCSYMCRYPT_MLKEM_VECTOR pvSrc1, + _In_ PCSYMCRYPT_MLKEM_VECTOR pvSrc2, + _Inout_ PSYMCRYPT_MLKEM_POLYELEMENT peDst, + _Inout_ PSYMCRYPT_MLKEM_POLYELEMENT_ACCUMULATOR paTmp ); +// +// peDst = (pvSrc1 o pvSrc2) ./ R +// +// Remarks: +// - paTmp is used internally for temporary storage, it is wiped before and after use +// + +VOID +SYMCRYPT_CALL +SymCryptMlKemVectorSetZero( + _Inout_ PSYMCRYPT_MLKEM_VECTOR pvSrc ); +// +// pvSrc = 0 +// + +VOID +SYMCRYPT_CALL +SymCryptMlKemVectorMulR( + _In_ PCSYMCRYPT_MLKEM_VECTOR pvSrc, + _Out_ PSYMCRYPT_MLKEM_VECTOR pvDst ); +// +// pvDst = pvSrc .* R +// + + +VOID +SYMCRYPT_CALL +SymCryptMlKemVectorAdd( + _In_ PCSYMCRYPT_MLKEM_VECTOR pvSrc1, + _In_ PCSYMCRYPT_MLKEM_VECTOR pvSrc2, + _Out_ PSYMCRYPT_MLKEM_VECTOR pvDst ); +// +// pvDst = pvSrc1 + pvSrc2 +// + +VOID +SYMCRYPT_CALL +SymCryptMlKemVectorSub( + _In_ PCSYMCRYPT_MLKEM_VECTOR pvSrc1, + _In_ PCSYMCRYPT_MLKEM_VECTOR pvSrc2, + _Out_ PSYMCRYPT_MLKEM_VECTOR pvDst ); +// +// pvDst = pvSrc1 - pvSrc2 +// + +VOID +SYMCRYPT_CALL +SymCryptMlKemVectorNTT( + _Inout_ PSYMCRYPT_MLKEM_VECTOR pvSrc ); +// +// pvSrc = NTT(peSrc) per FIPS 203 +// + +VOID +SYMCRYPT_CALL +SymCryptMlKemVectorINTTAndMulR( + _Inout_ PSYMCRYPT_MLKEM_VECTOR pvSrc ); +// +// pvSrc = NTTinverse(pvSrc) .* R +// + +VOID +SYMCRYPT_CALL +SymCryptMlKemVectorCompressAndEncode( + _In_ PCSYMCRYPT_MLKEM_VECTOR pvSrc, + UINT32 nBitsPerCoefficient, + _Out_writes_bytes_(cbDst) PBYTE pbDst, + SIZE_T cbDst ); +// +// See ML-KEM Polynomial Ring Element Compress and Encode +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptMlKemVectorDecodeAndDecompress( + _In_reads_bytes_(cbSrc) PCBYTE pbSrc, + SIZE_T cbSrc, + UINT32 nBitsPerCoefficient, + _Out_ PSYMCRYPT_MLKEM_VECTOR pvDst ); +// +// See ML-KEM Polynomial Ring Element Decode and Decompress +// + +VOID +SYMCRYPT_CALL +SymCryptMlKemkeyWipePrivateState( + _Inout_ PSYMCRYPT_MLKEMKEY pkMlKemkey ); +// +// Wipes the ML-KEM key's private state. +// diff --git a/libs/symcrypt/lib/scsTools.c b/libs/symcrypt/lib/scsTools.c new file mode 100644 index 00000000000..3787d36adf6 --- /dev/null +++ b/libs/symcrypt/lib/scsTools.c @@ -0,0 +1,367 @@ +// +// scsTools.c Support tools for writing side-channel safe code +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +// +// This code needs to process data in words, and we'd like to use 32-bit words on 32-bit +// architectures and 64-bit words on 64-bit architectures. So we use NATIVE_UINT & friends. +// + +// Buffer limits for SymCryptScsRotateBuffer +#define MIN_BUFFER_SIZE (32) + +// +// Masking functions +// Masking functions can be more efficient if the inputs are restricted to values that can +// be represented in the signed data types. +// This is why we have some functions that take 31-bit inputs. +// + +// 31-bit inputs + +UINT32 +SYMCRYPT_CALL +SymCryptMask32IsNonzeroU31( UINT32 v ) +{ + SYMCRYPT_ASSERT( v < (1UL<<31) ); + return (-(INT32) v) >> 31; +} + +UINT32 +SYMCRYPT_CALL +SymCryptMask32IsZeroU31( UINT32 v ) +{ + return ~SymCryptMask32IsNonzeroU31( v ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptMask32NeqU31( UINT32 a, UINT32 b ) +{ + SYMCRYPT_ASSERT( a < (1UL<<31) ); + SYMCRYPT_ASSERT( b < (1UL<<31) ); + + return SymCryptMask32IsNonzeroU31( a ^ b ); +} + +UINT32 +SYMCRYPT_CALL +SymCryptMask32LtU31( UINT32 a, UINT32 b ) +{ + SYMCRYPT_ASSERT( a < (1UL<<31) ); + SYMCRYPT_ASSERT( b < (1UL<<31) ); + + // Casting to INT32 is defined as a and b are < 2^31 + return ((INT32) a - (INT32) b) >> 31; +} + + +// 32-bit inputs + +UINT32 +SYMCRYPT_CALL +SymCryptMask32EqU32( UINT32 a, UINT32 b ) +{ + return ~(UINT32) ( (-(INT64)(a^b)) >> 32); +} + + +// Other helper functions +SIZE_T +SYMCRYPT_CALL +SymCryptRoundUpPow2Sizet( SIZE_T v ) +{ + SIZE_T res; + + SYMCRYPT_ASSERT( v <= (SIZE_T_MAX / 2) + 1); + // If v is very large, then the result res might overflow. + // As SIZE_T is an unsigned type, the overflow is defined to + // be modulo 2^n for some n, and therefore we'll get res==0 + // which will terminate the loop. + + res = 1; + while( res < v ) + { + res += res; + + // Catch any overflows; should never happen but break to avoid infinite loop + if( res == 0 ) + { + break; + } + } + + return res; +} + + +// +// Copy data +// + +VOID +SYMCRYPT_CALL +SymCryptScsCopy( + _In_reads_( cbDst ) PCBYTE pbSrc, + SIZE_T cbSrc, + _Out_writes_( cbDst ) PBYTE pbDst, + SIZE_T cbDst ) +// Copy cbSrc bytes of pbSrc into pbDst without revealing cbSrc +// through side channels. +// - pbSrc/cbSrc: buffer to copy data from +// - pbDst/cbDst: buffer that receives the data +// Equivalent to: +// n = min( cbSrc, cbDst ) +// pbDst[ 0.. n-1 ] = pbSrc[ 0 .. n - 1 ] +// cbSrc is protected from side-channels; cbDst is public. +// Note that pbSrc must be cbDst bytes long, not cbSrc bytes. +{ + UINT32 i; + + SYMCRYPT_ASSERT( cbSrc <= (1UL << 31) && cbDst <= (1UL << 31) ); + + // Loop over the destination buffer and update each byte with the source data (if appropriate) + // We round-robin loop over the source buffer + for( i = 0; i < cbDst; i++ ) + { + pbDst[ i ] ^= (pbSrc[ i ] ^ pbDst[ i ]) & SymCryptMask32LtU31( i, (UINT32) cbSrc ); + } +} + + +// +// Buffer rotation +// To recover a message from an encoding with variable data position we have to do a copy from a +// variable memory location. But our memory access pattern cannot depend on the secret location. +// This code rotates a given buffer by a variable # bytes without revealing the shift amount. +// +// For efficiency we do this using NATIVE_UINT values so that we get the best performance on each platform. +// +// The first step is to rotate the array between 0 and NATIVE_BYTES-1 bytes to get the proper word alignment. +// After that we only have to rotate the words. +// We do this using a sequence of swaps. +// Notation: +// W[i] array of words, 0 <= i < n where n is the # words, a power of 2. +// s Rotation amount (to the left). The value in W[s] at the start should appear in W[0] at the end. +// +// We use masked swaps as they seem to be more efficient then masked multiplexers. +// We can split this problem down recursively +// +// Function Rotate( W, n, s) +// - Rotate W[0..n/2-1] by s mod n/2 +// - Rotate W[n/2..n-1] by s mod n/2 +// for i in 0..n/2-1: +// swap W[i] and W[i+n/2] if (i+s) % n >= n/2 +// +// After the two half-sized rotates, each word is in the right position modulo n/2, so all that needs to be +// done in possibly swap (W[i],W[i+n/2]) pairs. +// Let W' be the array after the half-sized rotates. We have +// W'[i] = W[ (i + s) % (n/2) ] for i in 0..n/2 +// In the final array W'' we should have W''[i] = W[ (i+s)%n ] +// So W''[i] = W'[i] when (i+s) % n = (i+s) % n/2 which is equivalent to (i+s)%n < n/2. +// +// We turn this into a non-recursive algorithm. +// First we do rotations on 2 words, +// then the fixups to make it 4-word rotations, +// then on to 8-words, etc. +// At each level we compute the masks for the swaps once, and re-use them for each copy +// As a further optimization, we merge the 1st and 2nd pass into one to reduce the # read/writes +// +// We avoid using / and % throughout to avoid any time-dependent instructions. +// + +VOID +SYMCRYPT_CALL +SymCryptScsRotateBuffer( + _Inout_updates_( cbBuffer ) PBYTE pbBuffer, + SIZE_T cbBuffer, + SIZE_T lshift ) +{ + NATIVE_UINT * pBuf; + UINT32 n; + UINT32 a; + UINT32 b; + UINT32 i; + UINT32 j; + UINT32 blockSize; + UINT32 blockSizeLog; + UINT32 blockSizeLimit; + + NATIVE_UINT V; + NATIVE_UINT T; + NATIVE_UINT A; + NATIVE_UINT B; + NATIVE_UINT C; + NATIVE_UINT D; + NATIVE_UINT M; + NATIVE_UINT M0; + NATIVE_UINT M1; + + NATIVE_UINT Mask[ 16 ]; // Size must be a power of 2 + + SYMCRYPT_ASSERT( (cbBuffer & (cbBuffer - 1)) == 0 && cbBuffer >= MIN_BUFFER_SIZE ); + SYMCRYPT_ASSERT( lshift < cbBuffer ); + + pBuf = (NATIVE_UINT *) pbBuffer; + n = (UINT32)cbBuffer / NATIVE_BYTES; + + // First a rotate left by lshift % NATIVE_BYTES + // This is more complex because shifting by NATIVE_BITS is not a defined operation, and behavior is different + // on different CPUs. + + // Compute the shift amounts & mask + // M = 0 if lshift % NATIVE_BYTES == 0, -1 otherwise + a = 8 * (lshift & (NATIVE_BYTES-1)); // Core shift + M = (-(NATIVE_INT)a) >> (NATIVE_BITS - 1); // mask + b = (NATIVE_BITS - a) & (UINT32) M; // complementary shift, or 0 if it would be equal to NATIVE_BITS + + i = n; + V = pBuf[0]; + do{ + // Loop invariant: i > 0 && v = pBuf[i] from before any changes; + i--; + T = pBuf[i]; + pBuf[i] = T >> a | ((V << b) & M); + V = T; + } while( i > 0 ); + + // Now that the rotation is word-aligned, we can start our word rotation + lshift >>= NATIVE_BYTES_LOG2; // convert to # words to rotate. + + // We know we have at least 4 words, so we start with a pass do do 4-word rotations + SYMCRYPT_ASSERT( n >= 4 ); + + M = -(NATIVE_INT)(lshift & 1); + M0 = -(NATIVE_INT)( ((lshift + 0) >> 1) & 1 ); // s + 0 mod 4 >= 2 + M1 = -(NATIVE_INT)( ((lshift + 1) >> 1) & 1 ); // s + 1 mod 4 >= 2 + + for( i=0; i<n; i+=4 ) + { + A = pBuf[i]; + B = pBuf[i+1]; + C = pBuf[i+2]; + D = pBuf[i+3]; + + T = (A ^ B) & M; + A ^= T; + B ^= T; + + T = (C ^ D) & M; + C ^= T; + D ^= T; + + T = (A ^ C) & M0; + A ^= T; + C ^= T; + + T = (B ^ D) & M1; + B ^= T; + D ^= T; + + pBuf[i ] = A; + pBuf[i+1] = B; + pBuf[i+2] = C; + pBuf[i+3] = D; + } + + // Do the swaps using the mask array + blockSize = 4; // size of rotated blocks + blockSizeLog = 2; + + // + // Using the mask array is beneficial as long as the array is used twice or more + // Each swap loop processes 2 * blockSize of data, so the block size should never + // be larger than n/4 + blockSizeLimit = SYMCRYPT_MIN( SYMCRYPT_ARRAY_SIZE( Mask ), n/4 ); + while( blockSize <= blockSizeLimit ) + { + // Compute the masks for this level + for( i=0; i<blockSize; i++ ) + { + Mask[i] =-(NATIVE_INT)( ((i + lshift) >> blockSizeLog) & 1); + } + + // Now swap the elements of pairs of blocks according to the masks + for( i=0; i < n; i += 2 * blockSize ) + { + for( j=0; j < blockSize; j++ ) + { + A = pBuf[ i + j ]; + B = pBuf[ i + j + blockSize ]; + T = (A ^ B) & Mask[j]; + A ^= T; + B ^= T; + pBuf[ i + j ] = A; + pBuf[ i + j + blockSize ] = B; + } + } + blockSize *= 2; + blockSizeLog += 1; + } + + // Do the rest without using a mask array, either because we are only + // going to use each mask value once, or because we don't have a large-enough + // array + while( blockSize < n ) + { + // Now swap the elements of pairs of blocks according to the masks + for( i=0; i < n; i += 2 * blockSize ) + { + for( j=0; j < blockSize; j++ ) + { + M = -(NATIVE_INT)( ((j + lshift) >> blockSizeLog) & 1); + A = pBuf[ i + j ]; + B = pBuf[ i + j + blockSize ]; + T = (A ^ B) & M; + A ^= T; + B ^= T; + pBuf[ i + j ] = A; + pBuf[ i + j + blockSize ] = B; + } + } + blockSize *= 2; + blockSizeLog += 1; + } + +} + + +// +// Map values in a side-channel safe way, typically used for mapping error codes. +// +// (pcMap, nMap) point to an array of nMap entries of type SYMCRYPT_UINT32_MAP; +// each entry specifies a single mapping. If u32Input matches the +// 'from' field, the return value will be the 'to' field value. +// If u32Input is not equal to any 'from' field values, the return value is u32Default. +// Both u32Input and the return value are treated as secrets w.r.t. side channels. +// +// If multiple map entries have the same 'from' field value, then the return value +// is one of the several 'to' field values; which one is not defined. +// +// This function is particularly useful when mapping error codes in situations where +// the actual error cannot be revealed through side channels. +// + +UINT32 +SYMCRYPT_CALL +SymCryptMapUint32( + UINT32 u32Input, + UINT32 u32Default, + _In_reads_(nMap) PCSYMCRYPT_UINT32_MAP pcMap, + SIZE_T nMap) +{ + UINT32 mask; + UINT32 u32Output = u32Default; + + for (SIZE_T i = 0; i < nMap; ++i) + { + mask = SymCryptMask32EqU32(u32Input, pcMap[i].from); + u32Output ^= (u32Output ^ pcMap[i].to) & mask; + } + + return u32Output; +} diff --git a/libs/symcrypt/lib/selftest.c b/libs/symcrypt/lib/selftest.c new file mode 100644 index 00000000000..4921b343a59 --- /dev/null +++ b/libs/symcrypt/lib/selftest.c @@ -0,0 +1,17 @@ +// +// selftest.c +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +const BYTE SymCryptTestMsg3 [ 3] = { 'a', 'b', 'c' }; + +const BYTE SymCryptTestKey32[32] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, +}; + +const BYTE SymCryptTestMsg16[16] = { + 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff +}; diff --git a/libs/symcrypt/lib/session.c b/libs/symcrypt/lib/session.c new file mode 100644 index 00000000000..71f59549d9b --- /dev/null +++ b/libs/symcrypt/lib/session.c @@ -0,0 +1,377 @@ +// +// session.c code for Session API implementation +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSessionSenderInit( + _Inout_ PSYMCRYPT_SESSION pSession, + UINT32 senderId, + UINT32 flags ) +{ + // Make sure we only specify the correct flags + if (flags != 0) + { + return SYMCRYPT_INVALID_ARGUMENT; + } + + pSession->replayState.messageNumber = 0; + pSession->senderId = senderId; + pSession->flags = SYMCRYPT_FLAG_SESSION_ENCRYPT; + pSession->pMutex = NULL; + + return SYMCRYPT_NO_ERROR; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSessionReceiverInit( + _Inout_ PSYMCRYPT_SESSION pSession, + UINT32 senderId, + UINT32 flags ) +{ + PVOID pMutex = NULL; + + // Make sure we only specify the correct flags + if (flags != 0) + { + return SYMCRYPT_INVALID_ARGUMENT; + } + +#if SYMCRYPT_CPU_AMD64 + if ( !SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_CMPXCHG16B ) ) + { + pMutex = SymCryptCallbackAllocateMutexFastInproc(); + if( pMutex == NULL ) + { + return SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + } + } +#elif SYMCRYPT_CPU_ARM64 // Arm64 always has support for CAS128 - so never need a lock +#else // 32b and generic platforms will always need to use a lock + pMutex = SymCryptCallbackAllocateMutexFastInproc(); + if( pMutex == NULL ) + { + return SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + } +#endif + pSession->pMutex = pMutex; + + // This represents that the message numbers 1-64 inclusive have not yet been successfully use in decryption + pSession->replayState.replayMask = 0; + pSession->replayState.messageNumber = 64; + + pSession->senderId = senderId; + pSession->flags = 0; + + return SYMCRYPT_NO_ERROR; +} + +VOID +SYMCRYPT_CALL +SymCryptSessionDestroy(_Inout_ PSYMCRYPT_SESSION pSession ) +{ + if ( pSession->pMutex != NULL ) + { + SymCryptCallbackFreeMutexFastInproc(pSession->pMutex); + } + SymCryptWipeKnownSize(pSession, sizeof(*pSession)); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSessionGcmEncrypt( + _Inout_ PSYMCRYPT_SESSION pSession, + _In_ PCSYMCRYPT_GCM_EXPANDED_KEY pExpandedKey, + _In_reads_opt_( cbAuthData ) PCBYTE pbAuthData, + SIZE_T cbAuthData, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData, + _Out_writes_( cbTag ) PBYTE pbTag, + SIZE_T cbTag, + _Out_opt_ PUINT64 pu64MessageNumber ) +{ + BYTE nonce[12]; + UINT64 messageNumber; + + if ( (pSession->flags & SYMCRYPT_FLAG_SESSION_ENCRYPT) != SYMCRYPT_FLAG_SESSION_ENCRYPT ) + { + return SYMCRYPT_INVALID_ARGUMENT; + } + + messageNumber = SYMCRYPT_ATOMIC_ADD64_POST_RELAXED(&pSession->replayState.messageNumber, 1); + + // We do not allow messageNumber to go above some maximum value (currently 2^64 - 2^32) + if ( messageNumber > SYMCRYPT_SESSION_MAX_MESSAGE_NUMBER ) + { + // Decrement the session messageNumber on the error path so that this session will continue + // to only generate errors + SYMCRYPT_ATOMIC_ADD64_POST_RELAXED(&pSession->replayState.messageNumber, -1ll); + return SYMCRYPT_INVALID_ARGUMENT; + } + + SYMCRYPT_STORE_MSBFIRST32(&nonce[0], pSession->senderId); + SYMCRYPT_STORE_MSBFIRST64(&nonce[4], messageNumber); + + SymCryptGcmEncrypt( + pExpandedKey, + nonce, + sizeof(nonce), + pbAuthData, + cbAuthData, + pbSrc, + pbDst, + cbData, + pbTag, + cbTag); + + if( pu64MessageNumber != NULL ) + { + *pu64MessageNumber = messageNumber; + } + + return SYMCRYPT_NO_ERROR; +} + +// Convenience function used in SymCryptSessionDecryptUpdateState* +// +// Given an observedState check whether messageNumber represents a replay +// If it does, return SYMCRYPT_SESSION_REPLAY_FAILURE +// Otherwise, set desiredState to the observedState updated to represent messageNumber has been seen +// and return SYMCRYPT_NO_ERROR +FORCEINLINE +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSessionDecryptComputeDesiredReplayState( + _In_ PCSYMCRYPT_SESSION_REPLAY_STATE observedState, + _Out_ PSYMCRYPT_SESSION_REPLAY_STATE desiredState, + UINT64 messageNumber ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + UINT64 messageMask; + UINT64 shiftAmount; + UINT64 shiftedMask; + + if ( messageNumber > observedState->messageNumber ) + { + // The observed message number is behind messageNumber that we want to mark successful + // Shift replayMask appropriately to preserve previously seen message numbers + shiftedMask = 0; + shiftAmount = messageNumber - observedState->messageNumber; + if( shiftAmount < 64 ) + { + shiftedMask = observedState->replayMask << shiftAmount; + } + // Mark messageNumber as seen in the replayMask + desiredState->replayMask = shiftedMask | 1; + desiredState->messageNumber = messageNumber; + } + else if ( messageNumber <= observedState->messageNumber - 64 ) + { + // The observed message number is too far ahead of messageNumber + // We cannot hope to succeed + scError = SYMCRYPT_SESSION_REPLAY_FAILURE; + goto cleanup; + } + else + { + // The observed message number is ahead of or equal to messageNumber + // Check if messageNumber has already been used + messageMask = 1ull << (observedState->messageNumber - messageNumber); // shiftAmount is in [0, 63] + if ((messageMask & observedState->replayMask) == messageMask) + { + scError = SYMCRYPT_SESSION_REPLAY_FAILURE; + goto cleanup; + } + // This is first time we have seen messageNumber - set the replayMask bit appropriately + desiredState->replayMask = observedState->replayMask | messageMask; + desiredState->messageNumber = observedState->messageNumber; + } + +cleanup: + return scError; +} + +#if SYMCRYPT_USE_CAS128 + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSessionDecryptUpdateStateCAS128( + _Inout_ PSYMCRYPT_SESSION pSession, + UINT64 messageNumber ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + SYMCRYPT_SESSION_REPLAY_STATE expectedState; + SYMCRYPT_SESSION_REPLAY_STATE desiredState; + + // Non-atomic read of pSession's replayState. We can use this initial value as a good guess of + // the expected state, but we cannot fail based on it (as replayMask and messageNumber may have + // been read from different writes to the replayState) + expectedState = pSession->replayState; + + // Compute desiredState based on non-atomic read + // If it looks like this may be a replay, ensure we fail first CAS so we recompute desiredState + // from an atomic read in the loop below + if ( SymCryptSessionDecryptComputeDesiredReplayState(&expectedState, &desiredState, messageNumber) != SYMCRYPT_NO_ERROR ) + { + // pSession->replayState.messageNumber can never take the value 0 as it starts at 64 and is + // monotonic increasing + expectedState.messageNumber = 0; + } + + while( scError == SYMCRYPT_NO_ERROR ) + { + if ( SymCryptAtomicCas128Relaxed((PUINT64)&pSession->replayState, (PUINT64)&expectedState, (PUINT64)&desiredState) ) + { + // We succeeded in updating pSession->replayState and are done + break; + } + + // Compute new desiredState based on atomic read from CAS failure + // We may now correctly fall out of loop if a replay is detected + scError = SymCryptSessionDecryptComputeDesiredReplayState(&expectedState, &desiredState, messageNumber); + } + + return scError; +} + +#endif + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSessionDecryptUpdateStateLock( + _Inout_ PSYMCRYPT_SESSION pSession, + UINT64 messageNumber ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + SYMCRYPT_SESSION_REPLAY_STATE desiredState; + + if ( pSession->pMutex == NULL ) + { + return SYMCRYPT_INVALID_ARGUMENT; + } + + // Check whether we are definitely too late to proceed before attempting to acquire mutex + // Do not need atomic read of full replayState here, but do need atomic 64b read of + // pSession->replayState.messageNumber + if ( messageNumber <= (UINT64) SYMCRYPT_ATOMIC_LOAD64_RELAXED(&pSession->replayState.messageNumber) - 64 ) + { + return SYMCRYPT_SESSION_REPLAY_FAILURE; + } + + SymCryptCallbackAcquireMutexFastInproc(pSession->pMutex); + ////// + // !!! Do not return until we have called SymCryptCallbackReleaseMutexFastInproc !!! + ////// + + scError = SymCryptSessionDecryptComputeDesiredReplayState(&pSession->replayState, &desiredState, messageNumber); + if ( scError == SYMCRYPT_NO_ERROR ) + { + pSession->replayState = desiredState; + } + + SymCryptCallbackReleaseMutexFastInproc(pSession->pMutex); + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSessionDecryptUpdateState( + _Inout_ PSYMCRYPT_SESSION pSession, + UINT64 messageNumber ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + +#if SYMCRYPT_CPU_AMD64 + if ( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_CMPXCHG16B ) ) + { + scError = SymCryptSessionDecryptUpdateStateCAS128( pSession, messageNumber ); + } + else + { + scError = SymCryptSessionDecryptUpdateStateLock( pSession, messageNumber ); + } +#elif SYMCRYPT_CPU_ARM64 // Arm64 always has support for CAS128 (possibly via LDXP + STXP) + scError = SymCryptSessionDecryptUpdateStateCAS128( pSession, messageNumber ); +#else // 32b and generic platforms will always need to use a lock + scError = SymCryptSessionDecryptUpdateStateLock( pSession, messageNumber ); +#endif + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSessionGcmDecrypt( + _Inout_ PSYMCRYPT_SESSION pSession, + UINT64 messageNumber, + _In_ PCSYMCRYPT_GCM_EXPANDED_KEY pExpandedKey, + _In_reads_opt_( cbAuthData ) PCBYTE pbAuthData, + SIZE_T cbAuthData, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData, + _In_reads_( cbTag ) PCBYTE pbTag, + SIZE_T cbTag ) +{ + BYTE nonce[12]; + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + if ( (pSession->flags & SYMCRYPT_FLAG_SESSION_ENCRYPT) != 0 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Check for messageNumbers which are too high or not valid + if ( (messageNumber > SYMCRYPT_SESSION_MAX_MESSAGE_NUMBER) || (messageNumber == 0) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Check whether we are definitely too late to proceed before attempting to acquire mutex + // Do not need atomic read of full replayState here, but do need atomic 64b read of + // pSession->replayState.messageNumber + if ( messageNumber <= (UINT64) SYMCRYPT_ATOMIC_LOAD64_RELAXED(&pSession->replayState.messageNumber) - 64 ) + { + scError = SYMCRYPT_SESSION_REPLAY_FAILURE; + goto cleanup; + } + + SYMCRYPT_STORE_MSBFIRST32(&nonce[0], pSession->senderId); + SYMCRYPT_STORE_MSBFIRST64(&nonce[4], messageNumber); + + scError = SymCryptGcmDecrypt( + pExpandedKey, + nonce, + sizeof(nonce), + pbAuthData, + cbAuthData, + pbSrc, + pbDst, + cbData, + pbTag, + cbTag); + + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; // wipes pbDst twice, but we don't care about performance in the error case + } + + scError = SymCryptSessionDecryptUpdateState(pSession, messageNumber); + +cleanup: + if ( scError != SYMCRYPT_NO_ERROR ) + { + SymCryptWipe( pbDst, cbData ); + } + + return scError; +} diff --git a/libs/symcrypt/lib/sha1.c b/libs/symcrypt/lib/sha1.c new file mode 100644 index 00000000000..90711a9bfb3 --- /dev/null +++ b/libs/symcrypt/lib/sha1.c @@ -0,0 +1,472 @@ +// +// Sha1.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// This revised implementation is based on the older one in RSA32LIB by +// Scott Field and Dan Shumow. It is not based on any 3rd party code. +// + +#include "precomp.h" + +// +// See the symcrypt.h file for documentation on what the various functions do. +// + +const SYMCRYPT_HASH SymCryptSha1Algorithm_default = { + &SymCryptSha1Init, + &SymCryptSha1Append, + &SymCryptSha1Result, + &SymCryptSha1AppendBlocks, + &SymCryptSha1StateCopy, + sizeof( SYMCRYPT_SHA1_STATE ), + SYMCRYPT_SHA1_RESULT_SIZE, + SYMCRYPT_SHA1_INPUT_BLOCK_SIZE, + SYMCRYPT_FIELD_OFFSET( SYMCRYPT_SHA1_STATE, chain ), + SYMCRYPT_FIELD_SIZE( SYMCRYPT_SHA1_STATE, chain ), +}; + +const PCSYMCRYPT_HASH SymCryptSha1Algorithm = &SymCryptSha1Algorithm_default; + + +// +// The round constants used by SHA-1 +// +static const UINT32 Sha1K[4] = { + 0x5a827999UL, 0x6ed9eba1UL, 0x8f1bbcdcUL, 0xca62c1d6UL, +}; + +// +// Initial state +// +static const UINT32 sha1InitialState[5] = { + 0x67452301UL, + 0xefcdab89UL, + 0x98badcfeUL, + 0x10325476UL, + 0xc3d2e1f0UL, +}; + +// +// SymCryptSha1 +// +#define ALG SHA1 +#define Alg Sha1 +#include "hash_pattern.c" +#undef ALG +#undef Alg + + + + +// +// SymCryptSha1Init +// +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptSha1Init( _Out_ PSYMCRYPT_SHA1_STATE pState ) +{ + SYMCRYPT_SET_MAGIC( pState ); + + pState->dataLengthL = 0; + pState->dataLengthH = 0; + pState->bytesInBuffer = 0; + + memcpy( &pState->chain.H[0], &sha1InitialState[0], sizeof( sha1InitialState ) ); + + // + // There is no need to initialize the buffer part of the state as that will be + // filled before it is used. + // +} + + +// +// SymCryptSha1Append +// +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptSha1Append( + _Inout_ PSYMCRYPT_SHA1_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + SymCryptHashAppendInternal( SymCryptSha1Algorithm, (PSYMCRYPT_COMMON_HASH_STATE)pState, pbData, cbData ); +} + + +// +// SymCryptSha1Result +// +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptSha1Result( + _Inout_ PSYMCRYPT_SHA1_STATE pState, + _Out_writes_( SYMCRYPT_SHA1_RESULT_SIZE ) PBYTE pbResult ) +{ + UINT32 bytesInBuffer; + SIZE_T tmp; + + // + // SHA-1 uses almost the MD4 padding, except that the length in the padding is stored + // MSBFirst, rather than LSBFirst. + // As SHA-256 has a dedicated (fast) padding anyway, there is no gain to create a + // common padding routine for SHA-1 as it wouldn't be shared by anyone right now. + // + SYMCRYPT_CHECK_MAGIC( pState ); + + bytesInBuffer = (UINT32)(pState->bytesInBuffer); + + // + // The buffer is never completely full, so we can always put the first + // padding byte in. + // + pState->buffer[bytesInBuffer++] = 0x80; + + if( bytesInBuffer > 64-8 ) { + // + // No room for the rest of the padding. Pad with zeroes & process block + // bytesInBuffer is at most 64, so we do not have an integer underflow + // + memset( &pState->buffer[bytesInBuffer], 0, 64-bytesInBuffer ); + SymCryptSha1AppendBlocks( &pState->chain, pState->buffer, 64, &tmp ); + bytesInBuffer = 0; + } + + // + // Set rest of padding + // At this point bytesInBuffer <= 64-8, so we don't have an underflow + // We wipe to the end of the buffer as it is 16-aligned, + // and it is faster to wipe to an aligned point + // + memset( &pState->buffer[bytesInBuffer], 0, 64-bytesInBuffer ); + SYMCRYPT_STORE_MSBFIRST64( &pState->buffer[64-8], pState->dataLengthL * 8 ); + + // + // Process the final block + // + SymCryptSha1AppendBlocks( &pState->chain, pState->buffer, 64, &tmp ); + + // + // Write the output in the correct byte order + // + SymCryptUint32ToMsbFirst( &pState->chain.H[0], pbResult, 5 ); + + // + // Wipe & re-initialize + // We have to wipe the whole state because the Init call + // might be optimized away by a smart compiler. + // + SymCryptWipeKnownSize( pState, sizeof( *pState ) ); + SymCryptSha1Init( pState ); +} + + +// +// For documentation on these function see FIPS 180-2 +// +// CH, MAJ and PARITY are the functions Ch, Maj, and Parity from the standard. +// +//#define CH( x, y, z ) (((x) & (y)) ^ ((~(x)) & (z))) +//#define MAJ( x, y, z ) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) +#define MAJ( x, y, z ) ((((x) | (y)) & (z) ) | ((x) & (y))) +#define CH( x, y, z ) ((((z) ^ (y)) & (x)) ^ (z)) + +#define PARITY( x, y, z ) ((x) ^ (y) ^ (z) ) + + +// +// The values a-e are stored in an array called ae. +// We have unrolled the code completely. This makes both the indices into +// the ae array constant, and it makes the message addressing constant. +// + +// +// Initial round macro +// +// r is the round number +// ae[(r+0)%5] = e; +// ae[(r+1)%5] = d; +// ae[(r+2)%5] = c; +// ae[(r+3)%5] = b; +// ae[(r+4)%5] = a; +// After that incrementing the round number will automatically map a->b, b->c, etc. +// + +// +// The core round routine (excluding the message schedule) +// +// In more readable form this macro does the following: +// e = ROL(a,5) + F(b,c,d) + e + K[r/20] + W[round] +// b = ROL( b, 30 ) +// + +#define CROUND( a, b, c, d, e, r, F ) {\ + W[r%16] = Wt; \ + e += ROL32( a, 5 ) + F(b, c, d) + Sha1K[r/20] + Wt;\ + b = ROR32( b, 2 );\ +} + +#define IROUND( a, b, c, d, e, r, F ) { \ + Wt = SYMCRYPT_LOAD_MSBFIRST32( &pbData[ 4*r ] ); \ + CROUND( a, b, c, d, e, r, F ); \ +} + +// +// Subsequent rounds. +// This is the same as the IROUND except that it adds the message schedule, +// and takes the message word from the intermediate +// +#define FROUND( a, b, c, d, e, r, F ) { \ + Wt = ROL32( W[(r+13)%16] ^ W[(r+8)%16] ^ W[(r+2)%16] ^ W[r%16], 1 );\ + CROUND( a, b, c, d, e, r, F ); \ +} + +VOID +SYMCRYPT_CALL +SymCryptSha1AppendBlocks( + _Inout_ SYMCRYPT_SHA1_CHAINING_STATE * pChain, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ) +{ + + SYMCRYPT_ALIGN UINT32 W[16]; + UINT32 A, B, C, D, E; + UINT32 Wt; + + A = pChain->H[0]; + B = pChain->H[1]; + C = pChain->H[2]; + D = pChain->H[3]; + E = pChain->H[4]; + + while( cbData >= 64 ) + { + // + // initial rounds 1 to 16 + // + + IROUND( A, B, C, D, E, 0, CH ); + IROUND( E, A, B, C, D, 1, CH ); + IROUND( D, E, A, B, C, 2, CH ); + IROUND( C, D, E, A, B, 3, CH ); + IROUND( B, C, D, E, A, 4, CH ); + IROUND( A, B, C, D, E, 5, CH ); + IROUND( E, A, B, C, D, 6, CH ); + IROUND( D, E, A, B, C, 7, CH ); + IROUND( C, D, E, A, B, 8, CH ); + IROUND( B, C, D, E, A, 9, CH ); + IROUND( A, B, C, D, E, 10, CH ); + IROUND( E, A, B, C, D, 11, CH ); + IROUND( D, E, A, B, C, 12, CH ); + IROUND( C, D, E, A, B, 13, CH ); + IROUND( B, C, D, E, A, 14, CH ); + IROUND( A, B, C, D, E, 15, CH ); + + // + // Full rounds (including msg expansion) from here on + // + FROUND( E, A, B, C, D, 16, CH ); + FROUND( D, E, A, B, C, 17, CH ); + FROUND( C, D, E, A, B, 18, CH ); + FROUND( B, C, D, E, A, 19, CH ); + + + FROUND( A, B, C, D, E, 20, PARITY ); + FROUND( E, A, B, C, D, 21, PARITY ); + FROUND( D, E, A, B, C, 22, PARITY ); + FROUND( C, D, E, A, B, 23, PARITY ); + FROUND( B, C, D, E, A, 24, PARITY ); + FROUND( A, B, C, D, E, 25, PARITY ); + FROUND( E, A, B, C, D, 26, PARITY ); + FROUND( D, E, A, B, C, 27, PARITY ); + FROUND( C, D, E, A, B, 28, PARITY ); + FROUND( B, C, D, E, A, 29, PARITY ); + FROUND( A, B, C, D, E, 30, PARITY ); + FROUND( E, A, B, C, D, 31, PARITY ); + FROUND( D, E, A, B, C, 32, PARITY ); + FROUND( C, D, E, A, B, 33, PARITY ); + FROUND( B, C, D, E, A, 34, PARITY ); + FROUND( A, B, C, D, E, 35, PARITY ); + FROUND( E, A, B, C, D, 36, PARITY ); + FROUND( D, E, A, B, C, 37, PARITY ); + FROUND( C, D, E, A, B, 38, PARITY ); + FROUND( B, C, D, E, A, 39, PARITY ); + + + FROUND( A, B, C, D, E, 40, MAJ ); + FROUND( E, A, B, C, D, 41, MAJ ); + FROUND( D, E, A, B, C, 42, MAJ ); + FROUND( C, D, E, A, B, 43, MAJ ); + FROUND( B, C, D, E, A, 44, MAJ ); + FROUND( A, B, C, D, E, 45, MAJ ); + FROUND( E, A, B, C, D, 46, MAJ ); + FROUND( D, E, A, B, C, 47, MAJ ); + FROUND( C, D, E, A, B, 48, MAJ ); + FROUND( B, C, D, E, A, 49, MAJ ); + FROUND( A, B, C, D, E, 50, MAJ ); + FROUND( E, A, B, C, D, 51, MAJ ); + FROUND( D, E, A, B, C, 52, MAJ ); + FROUND( C, D, E, A, B, 53, MAJ ); + FROUND( B, C, D, E, A, 54, MAJ ); + FROUND( A, B, C, D, E, 55, MAJ ); + FROUND( E, A, B, C, D, 56, MAJ ); + FROUND( D, E, A, B, C, 57, MAJ ); + FROUND( C, D, E, A, B, 58, MAJ ); + FROUND( B, C, D, E, A, 59, MAJ ); + + FROUND( A, B, C, D, E, 60, PARITY ); + FROUND( E, A, B, C, D, 61, PARITY ); + FROUND( D, E, A, B, C, 62, PARITY ); + FROUND( C, D, E, A, B, 63, PARITY ); + FROUND( B, C, D, E, A, 64, PARITY ); + FROUND( A, B, C, D, E, 65, PARITY ); + FROUND( E, A, B, C, D, 66, PARITY ); + FROUND( D, E, A, B, C, 67, PARITY ); + FROUND( C, D, E, A, B, 68, PARITY ); + FROUND( B, C, D, E, A, 69, PARITY ); + FROUND( A, B, C, D, E, 70, PARITY ); + FROUND( E, A, B, C, D, 71, PARITY ); + FROUND( D, E, A, B, C, 72, PARITY ); + FROUND( C, D, E, A, B, 73, PARITY ); + FROUND( B, C, D, E, A, 74, PARITY ); + FROUND( A, B, C, D, E, 75, PARITY ); + FROUND( E, A, B, C, D, 76, PARITY ); + FROUND( D, E, A, B, C, 77, PARITY ); + FROUND( C, D, E, A, B, 78, PARITY ); + FROUND( B, C, D, E, A, 79, PARITY ); + + + pChain->H[0] = A = A + pChain->H[0]; + pChain->H[1] = B = B + pChain->H[1]; + pChain->H[2] = C = C + pChain->H[2]; + pChain->H[3] = D = D + pChain->H[3]; + pChain->H[4] = E = E + pChain->H[4]; + + pbData += 64; + cbData -= 64; + } + + *pcbRemaining = cbData; + + // + // Wipe the variables; + // + SymCryptWipeKnownSize( W, sizeof( W ) ); + SYMCRYPT_FORCE_WRITE32( &A, 0 ); + SYMCRYPT_FORCE_WRITE32( &B, 0 ); + SYMCRYPT_FORCE_WRITE32( &C, 0 ); + SYMCRYPT_FORCE_WRITE32( &D, 0 ); + SYMCRYPT_FORCE_WRITE32( &E, 0 ); + SYMCRYPT_FORCE_WRITE32( &Wt, 0 ); +} + + +VOID +SYMCRYPT_CALL +SymCryptSha1StateExport( + _In_ PCSYMCRYPT_SHA1_STATE pState, + _Out_writes_bytes_( SYMCRYPT_SHA1_STATE_EXPORT_SIZE ) PBYTE pbBlob ) +{ + SYMCRYPT_ALIGN SYMCRYPT_SHA1_STATE_EXPORT_BLOB blob; // local copy to have proper alignment. + C_ASSERT( sizeof( blob ) == SYMCRYPT_SHA1_STATE_EXPORT_SIZE ); + + SYMCRYPT_CHECK_MAGIC( pState ); + + SymCryptWipeKnownSize( &blob, sizeof( blob ) ); // wipe to avoid any data leakage + + blob.header.magic = SYMCRYPT_BLOB_MAGIC; + blob.header.size = SYMCRYPT_SHA1_STATE_EXPORT_SIZE; + blob.header.type = SymCryptBlobTypeSha1State; + + // + // Copy the relevant data. Buffer will be 0-padded. + // + + SymCryptUint32ToMsbFirst( &pState->chain.H[0], &blob.chain[0], 5 ); + blob.dataLength = pState->dataLengthL; + memcpy( &blob.buffer[0], &pState->buffer[0], blob.dataLength & 0x3f ); + + SYMCRYPT_ASSERT( (PCBYTE) &blob + sizeof( blob ) - sizeof( SYMCRYPT_BLOB_TRAILER ) == (PCBYTE) &blob.trailer ); + SymCryptMarvin32( SymCryptMarvin32DefaultSeed, (PCBYTE) &blob, sizeof( blob ) - sizeof( SYMCRYPT_BLOB_TRAILER ), &blob.trailer.checksum[0] ); + + memcpy( pbBlob, &blob, sizeof( blob ) ); + +//cleanup: + SymCryptWipeKnownSize( &blob, sizeof( blob ) ); + return; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSha1StateImport( + _Out_ PSYMCRYPT_SHA1_STATE pState, + _In_reads_bytes_( SYMCRYPT_SHA1_STATE_EXPORT_SIZE) PCBYTE pbBlob ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + SYMCRYPT_ALIGN SYMCRYPT_SHA1_STATE_EXPORT_BLOB blob; // local copy to have proper alignment. + BYTE checksum[8]; + + C_ASSERT( sizeof( blob ) == SYMCRYPT_SHA1_STATE_EXPORT_SIZE ); + memcpy( &blob, pbBlob, sizeof( blob ) ); + + if( blob.header.magic != SYMCRYPT_BLOB_MAGIC || + blob.header.size != SYMCRYPT_SHA1_STATE_EXPORT_SIZE || + blob.header.type != SymCryptBlobTypeSha1State ) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + SymCryptMarvin32( SymCryptMarvin32DefaultSeed, (PCBYTE) &blob, sizeof( blob ) - sizeof( SYMCRYPT_BLOB_TRAILER ), checksum ); + if( memcmp( checksum, &blob.trailer.checksum[0], 8 ) != 0 ) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + SymCryptMsbFirstToUint32( &blob.chain[0], &pState->chain.H[0], 5 ); + pState->dataLengthL = blob.dataLength; + pState->dataLengthH = 0; + pState->bytesInBuffer = blob.dataLength & 0x3f; + memcpy( &pState->buffer[0], &blob.buffer[0], pState->bytesInBuffer ); + + SYMCRYPT_SET_MAGIC( pState ); + +cleanup: + SymCryptWipeKnownSize( &blob, sizeof(blob) ); + return scError; +} + + + +// +// Simple test vector for FIPS module testing +// + +static const BYTE sha1KATAnswer[ 20 ] = { + 0xa9, 0x99, 0x3e, 0x36, + 0x47, 0x06, 0x81, 0x6a, + 0xba, 0x3e, 0x25, 0x71, + 0x78, 0x50, 0xc2, 0x6c, + 0x9c, 0xd0, 0xd8, 0x9d + } ; + +VOID +SYMCRYPT_CALL +SymCryptSha1Selftest(void) +{ + BYTE result[SYMCRYPT_SHA1_RESULT_SIZE]; + + SymCryptSha1( SymCryptTestMsg3, sizeof( SymCryptTestMsg3 ), result ); + + SymCryptInjectError( result, sizeof( result ) ); + + if( memcmp( result, sha1KATAnswer, sizeof( result ) ) != 0 ) { + SymCryptFatal( 'SHA1' ); + } +} diff --git a/libs/symcrypt/lib/sha256-xmm.c b/libs/symcrypt/lib/sha256-xmm.c new file mode 100644 index 00000000000..c181c4824a2 --- /dev/null +++ b/libs/symcrypt/lib/sha256-xmm.c @@ -0,0 +1,354 @@ +#include "precomp.h" + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + +#ifdef __clang__ +#pragma clang attribute push (__attribute__((target("ssse3"))), apply_to=function) +#else +#pragma GCC push_options +#pragma GCC target("ssse3") +#endif + +extern SYMCRYPT_ALIGN_AT(256) const UINT32 SymCryptSha256K[64]; + + +// Endianness transformation for 4 32-bit values in an XMM register +const SYMCRYPT_ALIGN_AT(16) UINT32 BYTE_REVERSE_32[4] = { + 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f, +}; + +// Shuffle 32-bit words in an XMM register: W3 W2 W1 W0 -> 0 0 W2 W0 +// Used by the SSSE3 assembly implementation +const SYMCRYPT_ALIGN_AT(16) UINT32 XMM_PACKLOW[4] = { + 0x03020100, 0x0b0a0908, 0x80808080, 0x80808080, +}; + +// Shuffle 32-bit words in an XMM register: W3 W2 W1 W0 -> W2 W0 0 0 +// Used by the SSSE3 assembly implementation +const SYMCRYPT_ALIGN_AT(16) UINT32 XMM_PACKHIGH[4] = { + 0x80808080, 0x80808080, 0x03020100, 0x0b0a0908, +}; + + +#if SYMCRYPT_MS_VC && !defined(__clang__) +#define RORX_U32 _rorx_u32 +#define RORX_U64 _rorx_u64 +#else +// TODO: implement _rorx functions for clang +#define RORX_U32 ROR32 +#define RORX_U64 ROR64 +#endif // SYMCRYPT_MS_VC + + +// +// For documentation on these function see FIPS 180-2 +// +// MAJ and CH are the functions Maj and Ch from the standard. +// CSIGMA0 and CSIGMA1 are the capital sigma functions. +// LSIGMA0 and LSIGMA1 are the lowercase sigma functions. +// +// The canonical definitions of the MAJ and CH functions are: +//#define MAJ( x, y, z ) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) +//#define CH( x, y, z ) (((x) & (y)) ^ ((~(x)) & (z))) +// We use optimized versions defined below +// +#define MAJ( x, y, z ) ((((z) | (y)) & (x) ) | ((z) & (y))) +#define CH( x, y, z ) ((((z) ^ (y)) & (x)) ^ (z)) + +#define LSIGMA0( x ) (ROR32((x), 7) ^ ROR32((x), 18) ^ ((x)>> 3)) +#define LSIGMA1( x ) (ROR32((x), 17) ^ ROR32((x), 19) ^ ((x)>>10)) + +#define CSIGMA0(x) (RORX_U32(x, 2) ^ RORX_U32(x, 13) ^ RORX_U32(x, 22)) +#define CSIGMA1(x) (RORX_U32(x, 6) ^ RORX_U32(x, 11) ^ RORX_U32(x, 25)) + + +#define LSIGMA0XMM( x ) \ + _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( \ + _mm_slli_epi32(x,25) , _mm_srli_epi32(x, 7) ),\ + _mm_slli_epi32(x,14) ), _mm_srli_epi32(x, 18) ),\ + _mm_srli_epi32(x, 3) ) +#define LSIGMA1XMM( x ) \ + _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( \ + _mm_slli_epi32(x,15) , _mm_srli_epi32(x, 17) ),\ + _mm_slli_epi32(x,13) ), _mm_srli_epi32(x, 19) ),\ + _mm_srli_epi32(x,10) ) + + + +// Initial loading of message words and endianness transformation. +// bl : The number of blocks to load, 1 <= bl <= 4. +// +// When bl < 4, the high order lanes of the XMM registers corresponding to the missing blocks are unused. +// +#define SHA256_MSG_LOAD_4BLOCKS(bl) { \ + for(SIZE_T i = 0; i < bl; i++) \ + { \ + Wx.xmm[i + 0] = _mm_shuffle_epi8(_mm_loadu_si128((__m128i*) &pbData[i * SYMCRYPT_SHA256_INPUT_BLOCK_SIZE + 0]), kBYTE_REVERSE_32); \ + Wx.xmm[i + 4] = _mm_shuffle_epi8(_mm_loadu_si128((__m128i*) &pbData[i * SYMCRYPT_SHA256_INPUT_BLOCK_SIZE + 16]), kBYTE_REVERSE_32); \ + Wx.xmm[i + 8] = _mm_shuffle_epi8(_mm_loadu_si128((__m128i*) &pbData[i * SYMCRYPT_SHA256_INPUT_BLOCK_SIZE + 32]), kBYTE_REVERSE_32); \ + Wx.xmm[i + 12] = _mm_shuffle_epi8(_mm_loadu_si128((__m128i*) &pbData[i * SYMCRYPT_SHA256_INPUT_BLOCK_SIZE + 48]), kBYTE_REVERSE_32); \ + } \ +} + +// Shuffles the initially loaded message words from multiple blocks +// so that each XMM register contains message words with the same index +// within a block (e.g. Wx.xmm[0] contains the first words of each block). +// +// We have to use this macro four times to transform the message blocks of 64-bytes. +// ind=0 processes the first quarter (16-bytes), ind=1 does the second quarter and so on. +// +#define SHA256_MSG_TRANSPOSE_QUARTER_4BLOCKS(ind) { \ + __m128i t1, t2, t3, t4; \ + t1 = _mm_unpacklo_epi32(Wx.xmm[4 * (ind) + 0], Wx.xmm[4 * (ind) + 1]); \ + t2 = _mm_unpacklo_epi32(Wx.xmm[4 * (ind) + 2], Wx.xmm[4 * (ind) + 3]); \ + t3 = _mm_unpackhi_epi32(Wx.xmm[4 * (ind) + 0], Wx.xmm[4 * (ind) + 1]); \ + t4 = _mm_unpackhi_epi32(Wx.xmm[4 * (ind) + 2], Wx.xmm[4 * (ind) + 3]); \ + Wx.xmm[4 * (ind) + 0] = _mm_unpacklo_epi64(t1, t2); \ + Wx.xmm[4 * (ind) + 1] = _mm_unpackhi_epi64(t1, t2); \ + Wx.xmm[4 * (ind) + 2] = _mm_unpacklo_epi64(t3, t4); \ + Wx.xmm[4 * (ind) + 3] = _mm_unpackhi_epi64(t3, t4); \ +} + +#define SHA256_MSG_TRANSPOSE_4BLOCKS() { \ + SHA256_MSG_TRANSPOSE_QUARTER_4BLOCKS(0); \ + SHA256_MSG_TRANSPOSE_QUARTER_4BLOCKS(1); \ + SHA256_MSG_TRANSPOSE_QUARTER_4BLOCKS(2); \ + SHA256_MSG_TRANSPOSE_QUARTER_4BLOCKS(3); \ +} + +// One round message schedule, updates the rth message word. ( 16 <= r < 64 ) +// Also adds the constants for round (r-16). +#define SHA256_MSG_EXPAND_4BLOCKS_1ROUND(r) { \ + Wx.xmm[r] = _mm_add_epi32(_mm_add_epi32(_mm_add_epi32(Wx.xmm[r - 16], Wx.xmm[r - 7]), \ + LSIGMA0XMM(Wx.xmm[r - 15])), LSIGMA1XMM(Wx.xmm[r - 2])); \ + Wx.xmm[r - 16] = _mm_add_epi32(Wx.xmm[r - 16], _mm_set1_epi32(SymCryptSha256K[r - 16])); \ +} + +// Four rounds of message schedule. Generates message words for rounds r, r+1, r+2, r+3. +#define SHA256_MSG_EXPAND_4BLOCKS_4ROUNDS(r) { \ + SHA256_MSG_EXPAND_4BLOCKS_1ROUND((r) + 0); SHA256_MSG_EXPAND_4BLOCKS_1ROUND((r) + 1); \ + SHA256_MSG_EXPAND_4BLOCKS_1ROUND((r) + 2); SHA256_MSG_EXPAND_4BLOCKS_1ROUND((r) + 3); \ +} +// Sixteen rounds of message schedule. Generates message words for rounds r, ..., r+15. +#define SHA256_MSG_EXPAND_4BLOCKS_16ROUNDS(r) { \ + SHA256_MSG_EXPAND_4BLOCKS_4ROUNDS((r) + 0); SHA256_MSG_EXPAND_4BLOCKS_4ROUNDS((r) + 4); \ + SHA256_MSG_EXPAND_4BLOCKS_4ROUNDS((r) + 8); SHA256_MSG_EXPAND_4BLOCKS_4ROUNDS((r) + 12); \ +} + +// Core round function using message words from Wx array. +// Wx contains -interleaved- expanded message words from b blocks. +// i.e. Message words for round r for each block, followed by the message words for the (r+1)^th block. +// +// r16 : round number mod 16 +// rb : base round number so that (rb+r16) gives the actual round number +// b : message block index, b = 0..3 +#define CROUND_4BLOCKS(r16, rb, b) { \ + Wt = Wx.ul4[(rb)+(r16)][b]; \ + ah[ r16 &7] += CSIGMA1(ah[(r16+3)&7]) + CH(ah[(r16+3)&7], ah[(r16+2)&7], ah[(r16+1)&7]) + Wt;\ + ah[(r16+4)&7] += ah[r16 &7];\ + ah[ r16 &7] += CSIGMA0(ah[(r16+7)&7]) + MAJ(ah[(r16+7)&7], ah[(r16+6)&7], ah[(r16+5)&7]);\ +} + +// +// Core round function +// +// r16 : round number mod 16 +// r : round number, r = 0..63 +// +#define CROUND( r16, r ) {;\ + ah[ r16 &7] += CSIGMA1(ah[(r16+3)&7]) + CH(ah[(r16+3)&7], ah[(r16+2)&7], ah[(r16+1)&7]) + SymCryptSha256K[r] + Wt;\ + ah[(r16+4)&7] += ah[r16 &7];\ + ah[ r16 &7] += CSIGMA0(ah[(r16+7)&7]) + MAJ(ah[(r16+7)&7], ah[(r16+6)&7], ah[(r16+5)&7]);\ +} + +// +// Initial round that reads the message. +// r is the round number 0..15 +// +#define IROUND( r ) {\ + Wt = SYMCRYPT_LOAD_MSBFIRST32( &pbData[ 4*r ] );\ + Wx.ul[r] = Wt; \ + CROUND(r,r);\ +} + +// +// Subsequent rounds. +// r16 is the round number mod 16. rb is the round number minus r16. +// +#define FROUND(r16, rb) { \ + Wt = LSIGMA1( Wx.ul[(r16-2) & 15] ) + Wx.ul[(r16-7) & 15] + \ + LSIGMA0( Wx.ul[(r16-15) & 15]) + Wx.ul[r16 & 15]; \ + Wx.ul[r16] = Wt; \ + CROUND( r16, r16+rb ); \ +} + + + +VOID +SYMCRYPT_CALL +SymCryptSha256AppendBlocks_xmm_4blocks( + _Inout_ SYMCRYPT_SHA256_CHAINING_STATE* pChain, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T* pcbRemaining) +{ + + SYMCRYPT_ALIGN union { UINT32 ul[16]; UINT32 ul4[64][4]; __m128i xmm[64]; } Wx; + SYMCRYPT_ALIGN UINT32 ah[8]; + UINT32 Wt; + SIZE_T uWipeSize = (cbData >= (3 * SYMCRYPT_SHA256_INPUT_BLOCK_SIZE)) ? (64 * 4 * sizeof(UINT32)) : (16 * sizeof(UINT32)); + + const __m128i kBYTE_REVERSE_32 = _mm_load_si128((const __m128i*)BYTE_REVERSE_32); + + while (cbData >= (3 * SYMCRYPT_SHA256_INPUT_BLOCK_SIZE)) + { + // If we have 4 or more blocks then process 4, else process whatever is left. + SIZE_T numBlocks = (cbData >= 4 * SYMCRYPT_SHA256_INPUT_BLOCK_SIZE) ? 4 : (cbData / SYMCRYPT_SHA256_INPUT_BLOCK_SIZE); + + SHA256_MSG_LOAD_4BLOCKS(numBlocks); + SHA256_MSG_TRANSPOSE_4BLOCKS(); + + for (int j = 16; j < 64; j += 16) + { + SHA256_MSG_EXPAND_4BLOCKS_16ROUNDS(j); + } + + // Constants up to r=48 were added during message expansion. Add the remaining ones here. + for (int i = 48; i < 64; i++) + { + Wx.xmm[i] = _mm_add_epi32(Wx.xmm[i], _mm_set1_epi32(SymCryptSha256K[i])); + } + + for (SIZE_T bl = 0; bl < numBlocks; bl++) + { + ah[7] = pChain->H[0]; + ah[6] = pChain->H[1]; + ah[5] = pChain->H[2]; + ah[4] = pChain->H[3]; + ah[3] = pChain->H[4]; + ah[2] = pChain->H[5]; + ah[1] = pChain->H[6]; + ah[0] = pChain->H[7]; + + for (int iterCount = 0; iterCount < (64/8); iterCount++) + { + const int roundBase = iterCount*8; + CROUND_4BLOCKS( 0, roundBase, bl); + CROUND_4BLOCKS( 1, roundBase, bl); + CROUND_4BLOCKS( 2, roundBase, bl); + CROUND_4BLOCKS( 3, roundBase, bl); + CROUND_4BLOCKS( 4, roundBase, bl); + CROUND_4BLOCKS( 5, roundBase, bl); + CROUND_4BLOCKS( 6, roundBase, bl); + CROUND_4BLOCKS( 7, roundBase, bl); + //CROUND_4BLOCKS( 8, roundBase, bl); + //CROUND_4BLOCKS( 9, roundBase, bl); + //CROUND_4BLOCKS(10, roundBase, bl); + //CROUND_4BLOCKS(11, roundBase, bl); + //CROUND_4BLOCKS(12, roundBase, bl); + //CROUND_4BLOCKS(13, roundBase, bl); + //CROUND_4BLOCKS(14, roundBase, bl); + //CROUND_4BLOCKS(15, roundBase, bl); + } + + pChain->H[0] = ah[7] + pChain->H[0]; + pChain->H[1] = ah[6] + pChain->H[1]; + pChain->H[2] = ah[5] + pChain->H[2]; + pChain->H[3] = ah[4] + pChain->H[3]; + pChain->H[4] = ah[3] + pChain->H[4]; + pChain->H[5] = ah[2] + pChain->H[5]; + pChain->H[6] = ah[1] + pChain->H[6]; + pChain->H[7] = ah[0] + pChain->H[7]; + } + + pbData += (numBlocks * SYMCRYPT_SHA256_INPUT_BLOCK_SIZE); + cbData -= (numBlocks * SYMCRYPT_SHA256_INPUT_BLOCK_SIZE); + } + + + while (cbData >= SYMCRYPT_SHA256_INPUT_BLOCK_SIZE) + { + ah[7] = pChain->H[0]; + ah[6] = pChain->H[1]; + ah[5] = pChain->H[2]; + ah[4] = pChain->H[3]; + ah[3] = pChain->H[4]; + ah[2] = pChain->H[5]; + ah[1] = pChain->H[6]; + ah[0] = pChain->H[7]; + + // + // initial rounds 1 to 16 + // + + IROUND(0); + IROUND(1); + IROUND(2); + IROUND(3); + IROUND(4); + IROUND(5); + IROUND(6); + IROUND(7); + IROUND(8); + IROUND(9); + IROUND(10); + IROUND(11); + IROUND(12); + IROUND(13); + IROUND(14); + IROUND(15); + + + // + // rounds 16 to 64. + // + for (int iterCount = 1; iterCount < (64/16); iterCount++) + { + const int roundBase = iterCount*16; + FROUND(0, roundBase); + FROUND(1, roundBase); + FROUND(2, roundBase); + FROUND(3, roundBase); + FROUND(4, roundBase); + FROUND(5, roundBase); + FROUND(6, roundBase); + FROUND(7, roundBase); + FROUND(8, roundBase); + FROUND(9, roundBase); + FROUND(10, roundBase); + FROUND(11, roundBase); + FROUND(12, roundBase); + FROUND(13, roundBase); + FROUND(14, roundBase); + FROUND(15, roundBase); + } + + pChain->H[0] = ah[7] + pChain->H[0]; + pChain->H[1] = ah[6] + pChain->H[1]; + pChain->H[2] = ah[5] + pChain->H[2]; + pChain->H[3] = ah[4] + pChain->H[3]; + pChain->H[4] = ah[3] + pChain->H[4]; + pChain->H[5] = ah[2] + pChain->H[5]; + pChain->H[6] = ah[1] + pChain->H[6]; + pChain->H[7] = ah[0] + pChain->H[7]; + + pbData += SYMCRYPT_SHA256_INPUT_BLOCK_SIZE; + cbData -= SYMCRYPT_SHA256_INPUT_BLOCK_SIZE; + } + + *pcbRemaining = cbData; + + // + // Wipe the variables; + // + SymCryptWipe(&Wx, uWipeSize); + SymCryptWipeKnownSize(ah, sizeof(ah)); +} + +#ifdef __clang__ +#pragma clang attribute pop +#else +#pragma GCC pop_options +#endif + +#endif // SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 diff --git a/libs/symcrypt/lib/sha256-ymm.c b/libs/symcrypt/lib/sha256-ymm.c new file mode 100644 index 00000000000..78bde9e2f5b --- /dev/null +++ b/libs/symcrypt/lib/sha256-ymm.c @@ -0,0 +1,441 @@ +#include "precomp.h" + + +#if SYMCRYPT_CPU_AMD64 + +#ifdef __clang__ +#pragma clang attribute push (__attribute__((target("avx2"))), apply_to=function) +#else +#pragma GCC push_options +#pragma GCC target("avx2") +#endif + +extern SYMCRYPT_ALIGN_AT(256) const UINT32 SymCryptSha256K[64]; + +// Endianness transformation for 8 32-bit values in a YMM register +const SYMCRYPT_ALIGN_AT(32) UINT32 BYTE_REVERSE_32X2[8] = { + 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f, + 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f, +}; + +#if SYMCRYPT_MS_VC && !defined(__clang__) +#define RORX_U32 _rorx_u32 +#define RORX_U64 _rorx_u64 +#else +// TODO: implement _rorx functions for clang +#define RORX_U32 ROR32 +#define RORX_U64 ROR64 +#endif // SYMCRYPT_MS_VC + + + +// +// For documentation on these function see FIPS 180-2 +// +// MAJ and CH are the functions Maj and Ch from the standard. +// CSIGMA0 and CSIGMA1 are the capital sigma functions. +// LSIGMA0 and LSIGMA1 are the lowercase sigma functions. +// +// The canonical definitions of the MAJ and CH functions are: +//#define MAJ( x, y, z ) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) +//#define CH( x, y, z ) (((x) & (y)) ^ ((~(x)) & (z))) +// We use optimized versions defined below +// + +#define MAJ( x, y, z ) ((((z) | (y)) & (x) ) | ((z) & (y))) +#define CH( x, y, z ) ((((z) ^ (y)) & (x)) ^ (z)) + +#define CSIGMA0(x) (RORX_U32(x, 2) ^ RORX_U32(x, 13) ^ RORX_U32(x, 22)) +#define CSIGMA1(x) (RORX_U32(x, 6) ^ RORX_U32(x, 11) ^ RORX_U32(x, 25)) + +#define LSIGMA0( x ) (ROR32((x), 7) ^ ROR32((x), 18) ^ ((x)>> 3)) +#define LSIGMA1( x ) (ROR32((x), 17) ^ ROR32((x), 19) ^ ((x)>>10)) + +#define LSIGMA0YMM( x ) \ + _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( \ + _mm256_slli_epi32(x,25) , _mm256_srli_epi32(x, 7) ),\ + _mm256_slli_epi32(x,14) ), _mm256_srli_epi32(x, 18) ),\ + _mm256_srli_epi32(x, 3) ) + +#define LSIGMA1YMM( x ) \ + _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( \ + _mm256_slli_epi32(x,15) , _mm256_srli_epi32(x, 17) ),\ + _mm256_slli_epi32(x,13) ), _mm256_srli_epi32(x, 19) ),\ + _mm256_srli_epi32(x,10) ) + + + +// Initial loading of message words and endianness transformation. +// bl : The number of blocks to load, 1 <= bl <= 8. +// +// When bl < 8, the high order lanes of the YMM registers corresponding to the missing blocks are unused. +// +#define SHA256_MSG_LOAD_8BLOCKS(_bl) { \ + for (int i = 0; i < (_bl); i++) \ + { \ + Wx.ymm[i + 0] = _mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*) &pbData[i * SYMCRYPT_SHA256_INPUT_BLOCK_SIZE + 0]), _mm256_load_si256((const __m256i*)BYTE_REVERSE_32X2)); \ + Wx.ymm[i + 8] = _mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*) &pbData[i * SYMCRYPT_SHA256_INPUT_BLOCK_SIZE + 32]), _mm256_load_si256((const __m256i*)BYTE_REVERSE_32X2)); \ + }\ +} + +// Shuffles the initially loaded message words from multiple blocks +// so that each YMM register contains message words with the same index +// within a block (e.g. Wx.ymm[0] contains the first words of each block). +// +// We have to use this macro twice to transform the message blocks of 64-bytes. +// ind=0 processes the first halves (32-bytes) of message blocks and ind=1 does the second halves. +// +#define SHA256_MSG_TRANSPOSE_HALF_8BLOCKS(ind) { \ + __m256i s1, s2, s3, s4, s5, s6, s7, s8; \ + __m256i u1, u2, u3, u4, u5, u6, u7, u8; \ + s1 = _mm256_unpacklo_epi32(Wx.ymm[8 * (ind) + 0], Wx.ymm[8 * (ind) + 1]); \ + s2 = _mm256_unpacklo_epi32(Wx.ymm[8 * (ind) + 2], Wx.ymm[8 * (ind) + 3]); \ + s3 = _mm256_unpacklo_epi32(Wx.ymm[8 * (ind) + 4], Wx.ymm[8 * (ind) + 5]); \ + s4 = _mm256_unpacklo_epi32(Wx.ymm[8 * (ind) + 6], Wx.ymm[8 * (ind) + 7]); \ + s5 = _mm256_unpackhi_epi32(Wx.ymm[8 * (ind) + 0], Wx.ymm[8 * (ind) + 1]); \ + s6 = _mm256_unpackhi_epi32(Wx.ymm[8 * (ind) + 2], Wx.ymm[8 * (ind) + 3]); \ + s7 = _mm256_unpackhi_epi32(Wx.ymm[8 * (ind) + 4], Wx.ymm[8 * (ind) + 5]); \ + s8 = _mm256_unpackhi_epi32(Wx.ymm[8 * (ind) + 6], Wx.ymm[8 * (ind) + 7]); \ + u1 = _mm256_unpacklo_epi64(s1, s2); \ + u2 = _mm256_unpacklo_epi64(s3, s4); \ + u3 = _mm256_unpacklo_epi64(s5, s6); \ + u4 = _mm256_unpacklo_epi64(s7, s8); \ + u5 = _mm256_unpackhi_epi64(s1, s2); \ + u6 = _mm256_unpackhi_epi64(s3, s4); \ + u7 = _mm256_unpackhi_epi64(s5, s6); \ + u8 = _mm256_unpackhi_epi64(s7, s8); \ + Wx.ymm[8 * (ind) + 0] = _mm256_permute2x128_si256(u1, u2, 0x20); \ + Wx.ymm[8 * (ind) + 1] = _mm256_permute2x128_si256(u5, u6, 0x20); \ + Wx.ymm[8 * (ind) + 2] = _mm256_permute2x128_si256(u3, u4, 0x20); \ + Wx.ymm[8 * (ind) + 3] = _mm256_permute2x128_si256(u7, u8, 0x20); \ + Wx.ymm[8 * (ind) + 4] = _mm256_permute2x128_si256(u1, u2, 0x31); \ + Wx.ymm[8 * (ind) + 5] = _mm256_permute2x128_si256(u5, u6, 0x31); \ + Wx.ymm[8 * (ind) + 6] = _mm256_permute2x128_si256(u3, u4, 0x31); \ + Wx.ymm[8 * (ind) + 7] = _mm256_permute2x128_si256(u7, u8, 0x31); \ +} + +#define SHA256_MSG_TRANSPOSE_8BLOCKS() { \ + SHA256_MSG_TRANSPOSE_HALF_8BLOCKS(0); \ + SHA256_MSG_TRANSPOSE_HALF_8BLOCKS(1); \ +} + +// +// One round of message expansion, generates message word at index r ( 16 <= r < 64 ). +// +// Additionally adds the constant to the (r-16)^th message word. We cannot add the constants to +// the message words with indices greater than (r-16) since they will be used in the message expansion. +// Constants for the last 16 words are added after message expansion is completed. +// +#define SHA256_MSG_EXPAND_8BLOCKS_1ROUND(r) { \ + Wx.ymm[r] = _mm256_add_epi32(_mm256_add_epi32(_mm256_add_epi32(Wx.ymm[r - 16], Wx.ymm[r - 7]), LSIGMA0YMM(Wx.ymm[r - 15])), LSIGMA1YMM(Wx.ymm[r - 2])); \ + Wx.ymm[r - 16] = _mm256_add_epi32(Wx.ymm[r - 16], _mm256_set1_epi32(SymCryptSha256K[r - 16])); \ +} + +// Four rounds of message schedule. Generates message words for rounds r, r+1, r+2, r+3. +#define SHA256_MSG_EXPAND_8BLOCKS_4ROUNDS(r) { \ + SHA256_MSG_EXPAND_8BLOCKS_1ROUND((r) + 0); SHA256_MSG_EXPAND_8BLOCKS_1ROUND((r) + 1); SHA256_MSG_EXPAND_8BLOCKS_1ROUND((r) + 2); SHA256_MSG_EXPAND_8BLOCKS_1ROUND((r) + 3); \ +} + +// Sixteen rounds of message schedule. Generates message words for rounds r, ..., r+15. +#define SHA256_MSG_EXPAND_8BLOCKS_16ROUNDS(r) { \ + SHA256_MSG_EXPAND_8BLOCKS_4ROUNDS((r) + 0); SHA256_MSG_EXPAND_8BLOCKS_4ROUNDS((r) + 4); SHA256_MSG_EXPAND_8BLOCKS_4ROUNDS((r) + 8); SHA256_MSG_EXPAND_8BLOCKS_4ROUNDS((r) + 12); \ +} + + +// Core round function without the constant addition. Uses rorx versions of CSIGMA functions. +// +// r16 : round number mod 16. +// rb : base round number so that (rb + r16) gives the actual round number. rb = 0, 16, 32, 48. +// bl : message block index, bl = 0..7. +#define CROUND_8BLOCKS(r16, rb, bl) { \ + UINT32 T2 = CSIGMA0(ah[(r16+7)&7]) + MAJ(ah[(r16+7)&7], ah[(r16+6)&7], ah[(r16+5)&7]); \ + UINT32 T1 = CSIGMA1(ah[(r16+3)&7]) + CH (ah[(r16+3)&7], ah[(r16+2)&7], ah[(r16+1)&7]) + Wx.ul8[(rb) + (r16)][bl];\ + ah[(r16+4)&7] += T1 + ah[ r16 &7]; \ + ah[ r16 &7] += T1 + T2; \ +} + +// +// Core round function for single message block processing +// r16 : round number mod 16 +// r : round number, r = 0..79 +// +#define CROUND( r16, r ) { \ + ah[ r16 &7] += CSIGMA1(ah[(r16+3)&7]) + CH(ah[(r16+3)&7], ah[(r16+2)&7], ah[(r16+1)&7]) + SymCryptSha256K[r] + Wt;\ + ah[(r16+4)&7] += ah[r16 &7];\ + ah[ r16 &7] += CSIGMA0(ah[(r16+7)&7]) + MAJ(ah[(r16+7)&7], ah[(r16+6)&7], ah[(r16+5)&7]);\ +} + + + +// +// Initial round that reads the message. +// r is the round number 0..15 +// +#define IROUND( r ) { \ + Wt = SYMCRYPT_LOAD_MSBFIRST32( &pbData[ 4*r ] );\ + Wx.ul[r] = Wt; \ + CROUND(r,r);\ +} + +// +// Subsequent rounds. +// r16 is the round number mod 16. rb is the round number minus r16. +// +#define FROUND(r16, rb) { \ + Wt = LSIGMA1( Wx.ul[(r16-2) & 15] ) + Wx.ul[(r16-7) & 15] + \ + LSIGMA0( Wx.ul[(r16-15) & 15]) + Wx.ul[r16 & 15]; \ + Wx.ul[r16] = Wt; \ + CROUND( r16, r16+rb ); \ +} + +// Constant addition and round processing for rounds = 48..63, must be called twice. +// This macro is not used at the moment but kept here for completeness. The implementation using +// this macro turns out to be slower compared to the existing one. +#define SHA256_8BLOCKS_FINAL_ROUNDS_8X(rnd) { \ + Wx.ymm[rnd + 0] = _mm256_add_epi32(Wx.ymm[rnd + 0], _mm256_set1_epi32(SymCryptSha256K[rnd + 0])); \ + Wx.ymm[rnd + 1] = _mm256_add_epi32(Wx.ymm[rnd + 1], _mm256_set1_epi32(SymCryptSha256K[rnd + 1])); \ + Wx.ymm[rnd + 2] = _mm256_add_epi32(Wx.ymm[rnd + 2], _mm256_set1_epi32(SymCryptSha256K[rnd + 2])); \ + Wx.ymm[rnd + 3] = _mm256_add_epi32(Wx.ymm[rnd + 3], _mm256_set1_epi32(SymCryptSha256K[rnd + 3])); \ + CROUND_8BLOCKS(0, rnd, 0); \ + CROUND_8BLOCKS(1, rnd, 0); \ + CROUND_8BLOCKS(2, rnd, 0); \ + CROUND_8BLOCKS(3, rnd, 0); \ + Wx.ymm[rnd + 4] = _mm256_add_epi32(Wx.ymm[rnd + 4], _mm256_set1_epi32(SymCryptSha256K[rnd + 4])); \ + Wx.ymm[rnd + 5] = _mm256_add_epi32(Wx.ymm[rnd + 5], _mm256_set1_epi32(SymCryptSha256K[rnd + 5])); \ + Wx.ymm[rnd + 6] = _mm256_add_epi32(Wx.ymm[rnd + 6], _mm256_set1_epi32(SymCryptSha256K[rnd + 6])); \ + Wx.ymm[rnd + 7] = _mm256_add_epi32(Wx.ymm[rnd + 7], _mm256_set1_epi32(SymCryptSha256K[rnd + 7])); \ + CROUND_8BLOCKS(4, rnd, 0); \ + CROUND_8BLOCKS(5, rnd, 0); \ + CROUND_8BLOCKS(6, rnd, 0); \ + CROUND_8BLOCKS(7, rnd, 0); \ +} + +VOID +SYMCRYPT_CALL +SymCryptSha256AppendBlocks_ymm_8blocks( + _Inout_ SYMCRYPT_SHA256_CHAINING_STATE* pChain, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T* pcbRemaining) +{ + + SYMCRYPT_ALIGN_AT(32) union { UINT32 ul[16]; UINT32 ul8[64][8]; __m256i ymm[64]; } Wx; + SYMCRYPT_ALIGN UINT32 ah[8]; + UINT32 Wt; + UINT32 uWipeSize = (cbData >= (5 * SYMCRYPT_SHA256_INPUT_BLOCK_SIZE)) ? (64 * 8 * sizeof(UINT32)) : (16 * sizeof(UINT32)); + + + _mm256_zeroupper(); + + while (cbData >= (5 * SYMCRYPT_SHA256_INPUT_BLOCK_SIZE)) + { + // If we have 8 or more blocks then process 8, else process whatever is left. + SIZE_T numBlocks = (cbData >= 8 * SYMCRYPT_SHA256_INPUT_BLOCK_SIZE) ? 8 : (cbData / SYMCRYPT_SHA256_INPUT_BLOCK_SIZE); + + SHA256_MSG_LOAD_8BLOCKS(numBlocks); + SHA256_MSG_TRANSPOSE_8BLOCKS(); + + // Process the first block together with message expansion. + // For the last 16 rounds we don't expand the message, instead just + // add the round constants. + { + ah[7] = pChain->H[0]; + ah[6] = pChain->H[1]; + ah[5] = pChain->H[2]; + ah[4] = pChain->H[3]; + ah[3] = pChain->H[4]; + ah[2] = pChain->H[5]; + ah[1] = pChain->H[6]; + ah[0] = pChain->H[7]; + + for (int r = 0; r < 64; r += 8) + { + if (r < 48) + { + SHA256_MSG_EXPAND_8BLOCKS_4ROUNDS(r + 16); + } + else + { + Wx.ymm[r + 0] = _mm256_add_epi32(Wx.ymm[r + 0], _mm256_set1_epi32(SymCryptSha256K[r + 0])); + Wx.ymm[r + 1] = _mm256_add_epi32(Wx.ymm[r + 1], _mm256_set1_epi32(SymCryptSha256K[r + 1])); + Wx.ymm[r + 2] = _mm256_add_epi32(Wx.ymm[r + 2], _mm256_set1_epi32(SymCryptSha256K[r + 2])); + Wx.ymm[r + 3] = _mm256_add_epi32(Wx.ymm[r + 3], _mm256_set1_epi32(SymCryptSha256K[r + 3])); + } + + CROUND_8BLOCKS(0, r, 0); + CROUND_8BLOCKS(1, r, 0); + CROUND_8BLOCKS(2, r, 0); + CROUND_8BLOCKS(3, r, 0); + + if (r < 48) + { + SHA256_MSG_EXPAND_8BLOCKS_4ROUNDS(r + 20); + } + else + { + Wx.ymm[r + 4] = _mm256_add_epi32(Wx.ymm[r + 4], _mm256_set1_epi32(SymCryptSha256K[r + 4])); + Wx.ymm[r + 5] = _mm256_add_epi32(Wx.ymm[r + 5], _mm256_set1_epi32(SymCryptSha256K[r + 5])); + Wx.ymm[r + 6] = _mm256_add_epi32(Wx.ymm[r + 6], _mm256_set1_epi32(SymCryptSha256K[r + 6])); + Wx.ymm[r + 7] = _mm256_add_epi32(Wx.ymm[r + 7], _mm256_set1_epi32(SymCryptSha256K[r + 7])); + } + + CROUND_8BLOCKS(4, r, 0); + CROUND_8BLOCKS(5, r, 0); + CROUND_8BLOCKS(6, r, 0); + CROUND_8BLOCKS(7, r, 0); + } + + // Alternative version where the loop above goes up to round=48 and + // the remaining 16 rounds are processed here. Despite the conditional logic, + // the above version is faster compared to the commented out one. + //SHA256_MS_8BLOCKS_FINAL_ROUNDS_8X(48); + //SHA256_MS_8BLOCKS_FINAL_ROUNDS_8X(56); + + pChain->H[0] = ah[7] + pChain->H[0]; + pChain->H[1] = ah[6] + pChain->H[1]; + pChain->H[2] = ah[5] + pChain->H[2]; + pChain->H[3] = ah[4] + pChain->H[3]; + pChain->H[4] = ah[3] + pChain->H[4]; + pChain->H[5] = ah[2] + pChain->H[5]; + pChain->H[6] = ah[1] + pChain->H[6]; + pChain->H[7] = ah[0] + pChain->H[7]; + } + + + for (int bl = 1; bl < numBlocks; bl++) + { + ah[7] = pChain->H[0]; + ah[6] = pChain->H[1]; + ah[5] = pChain->H[2]; + ah[4] = pChain->H[3]; + ah[3] = pChain->H[4]; + ah[2] = pChain->H[5]; + ah[1] = pChain->H[6]; + ah[0] = pChain->H[7]; + + for (int iterCount=0; iterCount<(64/16); iterCount++) + { + const int roundBase = iterCount*16; + CROUND_8BLOCKS( 0, roundBase, bl); + CROUND_8BLOCKS( 1, roundBase, bl); + CROUND_8BLOCKS( 2, roundBase, bl); + CROUND_8BLOCKS( 3, roundBase, bl); + CROUND_8BLOCKS( 4, roundBase, bl); + CROUND_8BLOCKS( 5, roundBase, bl); + CROUND_8BLOCKS( 6, roundBase, bl); + CROUND_8BLOCKS( 7, roundBase, bl); + CROUND_8BLOCKS( 8, roundBase, bl); + CROUND_8BLOCKS( 9, roundBase, bl); + CROUND_8BLOCKS(10, roundBase, bl); + CROUND_8BLOCKS(11, roundBase, bl); + CROUND_8BLOCKS(12, roundBase, bl); + CROUND_8BLOCKS(13, roundBase, bl); + CROUND_8BLOCKS(14, roundBase, bl); + CROUND_8BLOCKS(15, roundBase, bl); + } + + pChain->H[0] = ah[7] + pChain->H[0]; + pChain->H[1] = ah[6] + pChain->H[1]; + pChain->H[2] = ah[5] + pChain->H[2]; + pChain->H[3] = ah[4] + pChain->H[3]; + pChain->H[4] = ah[3] + pChain->H[4]; + pChain->H[5] = ah[2] + pChain->H[5]; + pChain->H[6] = ah[1] + pChain->H[6]; + pChain->H[7] = ah[0] + pChain->H[7]; + } + + pbData += (numBlocks * SYMCRYPT_SHA256_INPUT_BLOCK_SIZE); + cbData -= (numBlocks * SYMCRYPT_SHA256_INPUT_BLOCK_SIZE); + + } + + _mm256_zeroupper(); + + + while (cbData >= SYMCRYPT_SHA256_INPUT_BLOCK_SIZE) + { + ah[7] = pChain->H[0]; + ah[6] = pChain->H[1]; + ah[5] = pChain->H[2]; + ah[4] = pChain->H[3]; + ah[3] = pChain->H[4]; + ah[2] = pChain->H[5]; + ah[1] = pChain->H[6]; + ah[0] = pChain->H[7]; + + // + // initial rounds 1 to 16 + // + + IROUND(0); + IROUND(1); + IROUND(2); + IROUND(3); + IROUND(4); + IROUND(5); + IROUND(6); + IROUND(7); + IROUND(8); + IROUND(9); + IROUND(10); + IROUND(11); + IROUND(12); + IROUND(13); + IROUND(14); + IROUND(15); + + + // + // rounds 16 to 64. + // + for (int iterCount=1; iterCount<(64/16); iterCount++) + { + const int roundBase = iterCount*16; + FROUND( 0, roundBase); + FROUND( 1, roundBase); + FROUND( 2, roundBase); + FROUND( 3, roundBase); + FROUND( 4, roundBase); + FROUND( 5, roundBase); + FROUND( 6, roundBase); + FROUND( 7, roundBase); + FROUND( 8, roundBase); + FROUND( 9, roundBase); + FROUND(10, roundBase); + FROUND(11, roundBase); + FROUND(12, roundBase); + FROUND(13, roundBase); + FROUND(14, roundBase); + FROUND(15, roundBase); + } + + pChain->H[0] = ah[7] + pChain->H[0]; + pChain->H[1] = ah[6] + pChain->H[1]; + pChain->H[2] = ah[5] + pChain->H[2]; + pChain->H[3] = ah[4] + pChain->H[3]; + pChain->H[4] = ah[3] + pChain->H[4]; + pChain->H[5] = ah[2] + pChain->H[5]; + pChain->H[6] = ah[1] + pChain->H[6]; + pChain->H[7] = ah[0] + pChain->H[7]; + + pbData += SYMCRYPT_SHA256_INPUT_BLOCK_SIZE; + cbData -= SYMCRYPT_SHA256_INPUT_BLOCK_SIZE; + } + + *pcbRemaining = cbData; + + // + // Wipe the variables; + // + SymCryptWipe(&Wx, uWipeSize); + SymCryptWipeKnownSize(ah, sizeof(ah)); + SYMCRYPT_FORCE_WRITE32(&Wt, 0); +} + +#ifdef __clang__ +#pragma clang attribute pop +#else +#pragma GCC pop_options +#endif + +#endif // SYMCRYPT_CPU_AMD64 diff --git a/libs/symcrypt/lib/sha256.c b/libs/symcrypt/lib/sha256.c new file mode 100644 index 00000000000..975ead6d18c --- /dev/null +++ b/libs/symcrypt/lib/sha256.c @@ -0,0 +1,1884 @@ +// +// Sha256.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +// +// This module contains the routines to implement SHA2-256 from FIPS 180-2 +// +// This revised implementation is based on the older one in RSA32LIB by Scott Field from 2001 +// + +#include "precomp.h" + +// +// See the symcrypt.h file for documentation on what the various functions do. +// + +const SYMCRYPT_HASH SymCryptSha224Algorithm_default = { + &SymCryptSha224Init, + &SymCryptSha224Append, + &SymCryptSha224Result, + &SymCryptSha256AppendBlocks, + &SymCryptSha224StateCopy, + sizeof( SYMCRYPT_SHA224_STATE ), + SYMCRYPT_SHA224_RESULT_SIZE, + SYMCRYPT_SHA224_INPUT_BLOCK_SIZE, + SYMCRYPT_FIELD_OFFSET( SYMCRYPT_SHA224_STATE, chain ), + SYMCRYPT_FIELD_SIZE( SYMCRYPT_SHA224_STATE, chain ), +}; + +const SYMCRYPT_HASH SymCryptSha256Algorithm_default = { + &SymCryptSha256Init, + &SymCryptSha256Append, + &SymCryptSha256Result, + &SymCryptSha256AppendBlocks, + &SymCryptSha256StateCopy, + sizeof( SYMCRYPT_SHA256_STATE ), + SYMCRYPT_SHA256_RESULT_SIZE, + SYMCRYPT_SHA256_INPUT_BLOCK_SIZE, + SYMCRYPT_FIELD_OFFSET( SYMCRYPT_SHA256_STATE, chain ), + SYMCRYPT_FIELD_SIZE( SYMCRYPT_SHA256_STATE, chain ), +}; + +const PCSYMCRYPT_HASH SymCryptSha224Algorithm = &SymCryptSha224Algorithm_default; +const PCSYMCRYPT_HASH SymCryptSha256Algorithm = &SymCryptSha256Algorithm_default; + +// +// SHA-256 uses 64 magic constants of 32 bits each. These are +// referred to as K^{256}_i for i=0...63 by FIPS 180-2. +// This array is also used by the parallel SHA256 implementation +// For performance we align to 256 bytes, which gives optimal cache alignment. +// +SYMCRYPT_ALIGN_AT( 256 ) const UINT32 SymCryptSha256K[64] = { + 0x428a2f98UL, 0x71374491UL, 0xb5c0fbcfUL, 0xe9b5dba5UL, + 0x3956c25bUL, 0x59f111f1UL, 0x923f82a4UL, 0xab1c5ed5UL, + 0xd807aa98UL, 0x12835b01UL, 0x243185beUL, 0x550c7dc3UL, + 0x72be5d74UL, 0x80deb1feUL, 0x9bdc06a7UL, 0xc19bf174UL, + 0xe49b69c1UL, 0xefbe4786UL, 0x0fc19dc6UL, 0x240ca1ccUL, + 0x2de92c6fUL, 0x4a7484aaUL, 0x5cb0a9dcUL, 0x76f988daUL, + 0x983e5152UL, 0xa831c66dUL, 0xb00327c8UL, 0xbf597fc7UL, + 0xc6e00bf3UL, 0xd5a79147UL, 0x06ca6351UL, 0x14292967UL, + 0x27b70a85UL, 0x2e1b2138UL, 0x4d2c6dfcUL, 0x53380d13UL, + 0x650a7354UL, 0x766a0abbUL, 0x81c2c92eUL, 0x92722c85UL, + 0xa2bfe8a1UL, 0xa81a664bUL, 0xc24b8b70UL, 0xc76c51a3UL, + 0xd192e819UL, 0xd6990624UL, 0xf40e3585UL, 0x106aa070UL, + 0x19a4c116UL, 0x1e376c08UL, 0x2748774cUL, 0x34b0bcb5UL, + 0x391c0cb3UL, 0x4ed8aa4aUL, 0x5b9cca4fUL, 0x682e6ff3UL, + 0x748f82eeUL, 0x78a5636fUL, 0x84c87814UL, 0x8cc70208UL, + 0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL +}; + + +// +// Initial state +// +static const UINT32 sha224InitialState[8] = { + 0xc1059ed8UL, + 0x367cd507UL, + 0x3070dd17UL, + 0xf70e5939UL, + 0xffc00b31UL, + 0x68581511UL, + 0x64f98fa7UL, + 0xbefa4fa4UL, +}; + +static const UINT32 sha256InitialState[8] = { + 0x6a09e667UL, + 0xbb67ae85UL, + 0x3c6ef372UL, + 0xa54ff53aUL, + 0x510e527fUL, + 0x9b05688cUL, + 0x1f83d9abUL, + 0x5be0cd19UL, +}; + +// +// SymCryptSha224 +// +#define ALG SHA224 +#define Alg Sha224 +#include "hash_pattern.c" +#undef ALG +#undef Alg + +// +// SymCryptSha256 +// +#define ALG SHA256 +#define Alg Sha256 +#include "hash_pattern.c" +#undef ALG +#undef Alg + + + +// +// SymCryptSha256Init +// +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptSha256Init( _Out_ PSYMCRYPT_SHA256_STATE pState ) +{ + SYMCRYPT_SET_MAGIC( pState ); + + pState->dataLengthL = 0; + //pState->dataLengthH = 0; // not used + pState->bytesInBuffer = 0; + + memcpy( &pState->chain.H[0], &sha256InitialState[0], sizeof( sha256InitialState ) ); + + // + // There is no need to initialize the buffer part of the state as that will be + // filled before it is used. + // +} + + +// +// SymCryptSha224Init +// +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptSha224Init( _Out_ PSYMCRYPT_SHA224_STATE pState ) +{ + SYMCRYPT_SET_MAGIC( pState ); + + pState->dataLengthL = 0; + //pState->dataLengthH = 0; // not used + pState->bytesInBuffer = 0; + + memcpy( &pState->chain.H[0], &sha224InitialState[0], sizeof( sha224InitialState ) ); + + // + // There is no need to initialize the buffer part of the state as that will be + // filled before it is used. + // +} + + +// +// SymCryptSha256Append +// +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptSha256Append( + _Inout_ PSYMCRYPT_SHA256_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + UINT32 bytesInBuffer; + UINT32 freeInBuffer; + SIZE_T tmp; + + SYMCRYPT_CHECK_MAGIC( pState ); + + pState->dataLengthL += cbData; // dataLengthH is not used... + + bytesInBuffer = pState->bytesInBuffer; + + // + // If previous data in buffer, buffer new input and transform if possible. + // + if( bytesInBuffer > 0 ) + { + SYMCRYPT_ASSERT( SYMCRYPT_SHA256_INPUT_BLOCK_SIZE > bytesInBuffer ); + + freeInBuffer = SYMCRYPT_SHA256_INPUT_BLOCK_SIZE - bytesInBuffer; + if( cbData < freeInBuffer ) + { + // + // All the data will fit in the buffer. + // We don't do anything here. + // As cbData < inputBlockSize the bulk data processing is skipped, + // and the data will be copied to the buffer at the end + // of this code. + } else { + // + // Enough data to fill the whole buffer & process it + // + memcpy(&pState->buffer[bytesInBuffer], pbData, freeInBuffer); + pbData += freeInBuffer; + cbData -= freeInBuffer; + SymCryptSha256AppendBlocks( &pState->chain, &pState->buffer[0], SYMCRYPT_SHA256_INPUT_BLOCK_SIZE, &tmp ); + + bytesInBuffer = 0; + } + } + + // + // Internal buffer is empty; process all remaining whole blocks in the input + // + if( cbData >= SYMCRYPT_SHA256_INPUT_BLOCK_SIZE ) + { + SymCryptSha256AppendBlocks( &pState->chain, pbData, cbData, &tmp ); + SYMCRYPT_ASSERT( tmp < SYMCRYPT_SHA256_INPUT_BLOCK_SIZE ); + pbData += cbData - tmp; + cbData = tmp; + } + + SYMCRYPT_ASSERT( cbData < SYMCRYPT_SHA256_INPUT_BLOCK_SIZE ); + + // + // buffer remaining input if necessary. + // + if( cbData > 0 ) + { + memcpy( &pState->buffer[bytesInBuffer], pbData, cbData ); + bytesInBuffer += (UINT32) cbData; + } + + pState->bytesInBuffer = bytesInBuffer; +} + + +// +// SymCryptSha224Append +// +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptSha224Append( + _Inout_ PSYMCRYPT_SHA224_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + SymCryptSha256Append( (PSYMCRYPT_SHA256_STATE)pState, pbData, cbData ); +} + + +// +// SymCryptSha256Result +// +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptSha256Result( + _Inout_ PSYMCRYPT_SHA256_STATE pState, + _Out_writes_( SYMCRYPT_SHA256_RESULT_SIZE ) PBYTE pbResult ) +{ + // + // We don't use the common padding code as that is slower, and SHA-256 is very frequently used in + // performance-sensitive areas. + // + UINT32 bytesInBuffer; + SIZE_T tmp; + + SYMCRYPT_CHECK_MAGIC( pState ); + + bytesInBuffer = pState->bytesInBuffer; + + // + // The buffer is never completely full, so we can always put the first + // padding byte in. + // + pState->buffer[bytesInBuffer++] = 0x80; + + if( bytesInBuffer > 64-8 ) { + // + // No room for the rest of the padding. Pad with zeroes & process block + // bytesInBuffer is at most 64, so we do not have an integer underflow + // + SymCryptWipe( &pState->buffer[bytesInBuffer], 64-bytesInBuffer ); + SymCryptSha256AppendBlocks( &pState->chain, pState->buffer, 64, &tmp ); + bytesInBuffer = 0; + } + + // + // Set rest of padding + // At this point bytesInBuffer <= 64-8, so we don't have an underflow + // We wipe to the end of the buffer as it is 16-aligned, + // and it is faster to wipe to an aligned point + // + SymCryptWipe( &pState->buffer[bytesInBuffer], 64-bytesInBuffer ); + SYMCRYPT_STORE_MSBFIRST64( &pState->buffer[64-8], pState->dataLengthL * 8 ); + + // + // Process the final block + // + SymCryptSha256AppendBlocks( &pState->chain, pState->buffer, 64, &tmp ); + + // + // Write the output in the correct byte order + // + SymCryptUint32ToMsbFirst( &pState->chain.H[0], pbResult, 8 ); + + // + // Wipe & re-initialize + // We have to wipe the whole state because the Init call + // might be optimized away by a smart compiler. + // + SymCryptWipeKnownSize( pState, sizeof( *pState ) ); + + memcpy( &pState->chain.H[0], &sha256InitialState[0], sizeof( sha256InitialState ) ); + SYMCRYPT_SET_MAGIC( pState ); +} + + +// +// SymCryptSha224Result +// +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptSha224Result( + _Inout_ PSYMCRYPT_SHA224_STATE pState, + _Out_writes_( SYMCRYPT_SHA224_RESULT_SIZE ) PBYTE pbResult ) +{ + SYMCRYPT_ALIGN BYTE sha256Result[SYMCRYPT_SHA256_RESULT_SIZE]; // Buffer for SHA-256 output + + // + // The SHA-3224 result is the first 28 bytes of the SHA-256 result of our state + // + SymCryptSha256Result( (PSYMCRYPT_SHA256_STATE)pState, sha256Result ); + memcpy( pbResult, sha256Result, SYMCRYPT_SHA224_RESULT_SIZE ); + + // + // The buffer was already wiped by the SymCryptSha256Result function, we + // just have to re-initialize for SHA-224 + // + SymCryptSha224Init( pState ); + + SymCryptWipeKnownSize( sha256Result, sizeof( sha256Result ) ); +} + + +VOID +SYMCRYPT_CALL +SymCryptSha256StateExportCore( + _In_ PCSYMCRYPT_SHA256_STATE pState, + _Out_writes_bytes_( SYMCRYPT_SHA256_STATE_EXPORT_SIZE ) PBYTE pbBlob, + _In_ UINT32 type ) +{ + SYMCRYPT_ALIGN SYMCRYPT_SHA256_STATE_EXPORT_BLOB blob; // local copy to have proper alignment. + C_ASSERT( sizeof( blob ) == SYMCRYPT_SHA256_STATE_EXPORT_SIZE ); + + SYMCRYPT_CHECK_MAGIC( pState ); + + SymCryptWipeKnownSize( &blob, sizeof( blob ) ); // wipe to avoid any data leakage + + blob.header.magic = SYMCRYPT_BLOB_MAGIC; + blob.header.size = SYMCRYPT_SHA256_STATE_EXPORT_SIZE; + blob.header.type = type; + + // + // Copy the relevant data. Buffer will be 0-padded. + // + + SymCryptUint32ToMsbFirst( &pState->chain.H[0], &blob.chain[0], 8 ); + blob.dataLength = pState->dataLengthL; + memcpy( &blob.buffer[0], &pState->buffer[0], blob.dataLength & 0x3f ); + + SYMCRYPT_ASSERT( (PCBYTE) &blob + sizeof( blob ) - sizeof( SYMCRYPT_BLOB_TRAILER ) == (PCBYTE) &blob.trailer ); + SymCryptMarvin32( SymCryptMarvin32DefaultSeed, (PCBYTE) &blob, sizeof( blob ) - sizeof( SYMCRYPT_BLOB_TRAILER ), &blob.trailer.checksum[0] ); + + memcpy( pbBlob, &blob, sizeof( blob ) ); + +//cleanup: + SymCryptWipeKnownSize( &blob, sizeof( blob ) ); + return; +} + + +VOID +SYMCRYPT_CALL +SymCryptSha256StateExport( + _In_ PCSYMCRYPT_SHA256_STATE pState, + _Out_writes_bytes_( SYMCRYPT_SHA256_STATE_EXPORT_SIZE ) PBYTE pbBlob) +{ + SymCryptSha256StateExportCore( pState, pbBlob, SymCryptBlobTypeSha256State ); +} + + +VOID +SYMCRYPT_CALL +SymCryptSha224StateExport( + _In_ PCSYMCRYPT_SHA224_STATE pState, + _Out_writes_bytes_( SYMCRYPT_SHA256_STATE_EXPORT_SIZE ) PBYTE pbBlob) +{ + SymCryptSha256StateExportCore( (PSYMCRYPT_SHA256_STATE)pState, pbBlob, SymCryptBlobTypeSha224State ); +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSha256StateImportCore( + _Out_ PSYMCRYPT_SHA256_STATE pState, + _In_reads_bytes_( SYMCRYPT_SHA256_STATE_EXPORT_SIZE) PCBYTE pbBlob, + _In_ UINT32 type ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + SYMCRYPT_ALIGN SYMCRYPT_SHA256_STATE_EXPORT_BLOB blob; // local copy to have proper alignment. + BYTE checksum[8]; + + C_ASSERT( sizeof( blob ) == SYMCRYPT_SHA256_STATE_EXPORT_SIZE ); + memcpy( &blob, pbBlob, sizeof( blob ) ); + + if( blob.header.magic != SYMCRYPT_BLOB_MAGIC || + blob.header.size != SYMCRYPT_SHA256_STATE_EXPORT_SIZE || + blob.header.type != type ) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + SymCryptMarvin32( SymCryptMarvin32DefaultSeed, (PCBYTE) &blob, sizeof( blob ) - sizeof( SYMCRYPT_BLOB_TRAILER ), checksum ); + if( memcmp( checksum, &blob.trailer.checksum[0], 8 ) != 0 ) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + SymCryptMsbFirstToUint32( &blob.chain[0], &pState->chain.H[0], 8 ); + pState->dataLengthL = blob.dataLength; + pState->bytesInBuffer = blob.dataLength & 0x3f; + memcpy( &pState->buffer[0], &blob.buffer[0], pState->bytesInBuffer ); + + SYMCRYPT_SET_MAGIC( pState ); + +cleanup: + SymCryptWipeKnownSize( &blob, sizeof(blob) ); + return scError; +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSha256StateImport( + _Out_ PSYMCRYPT_SHA256_STATE pState, + _In_reads_bytes_( SYMCRYPT_SHA256_STATE_EXPORT_SIZE) PCBYTE pbBlob ) +{ + return SymCryptSha256StateImportCore( pState, pbBlob, SymCryptBlobTypeSha256State ); +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSha224StateImport( + _Out_ PSYMCRYPT_SHA224_STATE pState, + _In_reads_bytes_( SYMCRYPT_SHA224_STATE_EXPORT_SIZE) PCBYTE pbBlob ) +{ + return SymCryptSha256StateImportCore( (PSYMCRYPT_SHA256_STATE)pState, pbBlob, SymCryptBlobTypeSha224State ); +} + + + +// +// Simple test vector for FIPS module testing +// + +const BYTE SymCryptSha256KATAnswer[ 32 ] = { + 0xba, 0x78, 0x16, 0xbf, 0x8f, 0x01, 0xcf, 0xea, + 0x41, 0x41, 0x40, 0xde, 0x5d, 0xae, 0x22, 0x23, + 0xb0, 0x03, 0x61, 0xa3, 0x96, 0x17, 0x7a, 0x9c, + 0xb4, 0x10, 0xff, 0x61, 0xf2, 0x00, 0x15, 0xad, + } ; + +VOID +SYMCRYPT_CALL +SymCryptSha256Selftest(void) +{ + BYTE result[SYMCRYPT_SHA256_RESULT_SIZE]; + + SymCryptSha256( SymCryptTestMsg3, sizeof( SymCryptTestMsg3 ), result ); + + SymCryptInjectError( result, sizeof( result ) ); + + if( memcmp( result, SymCryptSha256KATAnswer, sizeof( result ) ) != 0 ) { + SymCryptFatal( 'SH25' ); + } +} + +// +// Simple test vector for FIPS module testing +// + +const BYTE SymCryptSha224KATAnswer[ 28 ] = { + 0x23, 0x09, 0x7d, 0x22, 0x34, 0x05, 0xd8, 0x22, + 0x86, 0x42, 0xa4, 0x77, 0xbd, 0xa2, 0x55, 0xb3, + 0x2a, 0xad, 0xbc, 0xe4, 0xbd, 0xa0, 0xb3, 0xf7, + 0xe3, 0x6c, 0x9d, 0xa7, + } ; + +VOID +SYMCRYPT_CALL +SymCryptSha224Selftest(void) +{ + BYTE result[SYMCRYPT_SHA224_RESULT_SIZE]; + + SymCryptSha224( SymCryptTestMsg3, sizeof( SymCryptTestMsg3 ), result ); + + SymCryptInjectError( result, sizeof( result ) ); + + if( memcmp( result, SymCryptSha224KATAnswer, sizeof( result ) ) != 0 ) { + SymCryptFatal( 'SH22' ); + } +} + + + +// +// Below are multiple implementations of the SymCryptSha256AppendBlocks function, +// with a compile-time switch about which one to use. +// We keep the multiple implementations here for future reference; +// as CPU architectures evolve we might want to switch to one of the +// other implementations. +// All implementations here have been tested, but some lack production hardening. +// + +// +// Enable frame pointer omission to free up an extra register on X86. +// +#if SYMCRYPT_CPU_X86 && SYMCRYPT_MS_VC && !defined(__clang__) +#pragma optimize( "y", on ) +#endif + +// +// For documentation on these function see FIPS 180-2 +// +// MAJ and CH are the functions Maj and Ch from the standard. +// CSIGMA0 and CSIGMA1 are the capital sigma functions. +// LSIGMA0 and LSIGMA1 are the lowercase sigma functions. +// +// The canonical definitions of the MAJ and CH functions are: +//#define MAJ( x, y, z ) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) +//#define CH( x, y, z ) (((x) & (y)) ^ ((~(x)) & (z))) +// We use optimized versions defined below +// +#define MAJ( x, y, z ) ((((z) | (y)) & (x) ) | ((z) & (y))) +#define CH( x, y, z ) ((((z) ^ (y)) & (x)) ^ (z)) + +// +// The four Sigma functions +// + +// +// We have two versions of the rotate-and-xor functions. +// one is just a macro that does the rotations and xors. +// This works well on ARM +// For Intel/AMD we have one where we use the rotated value +// from one intermediate result to derive the next rotated +// value from. This removes one register copy from the +// code stream. +// +// In practice, our compiler doesn't take advantage of the +// reduction in the # operations required, and inserts a +// bunch of extra register copies anyway. +// It actually hurts on AMD64. +// +// This should be re-tuned for every release to get the best overall +// SHA-256 performance. +// At the moment we get an improvement from 19.76 c/B to 19.40 c/B on a Core 2 core. +// We should probably tune this to the Atom CPU. +// +#if SYMCRYPT_CPU_X86 +#define USE_CSIGMA0_MULTIROT 1 +#define USE_CSIGMA1_MULTIROT 0 +#define USE_LSIGMA0_MULTIROT 0 +#define USE_LSIGMA1_MULTIROT 0 + +#else +// +// On ARM we have no reason to believe this helps at all. +// on AMD64 it slows our code down. +// +#define USE_CSIGMA0_MULTIROT 0 +#define USE_CSIGMA1_MULTIROT 0 +#define USE_LSIGMA0_MULTIROT 0 +#define USE_LSIGMA1_MULTIROT 0 +#endif + +#if USE_CSIGMA0_MULTIROT +FORCEINLINE +UINT32 +CSIGMA0( UINT32 x ) +{ + UINT32 res; + x = ROR32( x, 2 ); + res = x; + x = ROR32( x, 11 ); + res ^= x; + x = ROR32( x, 9 ); + res ^= x; + return res; +} +#else +#define CSIGMA0( x ) (ROR32((x), 2) ^ ROR32((x), 13) ^ ROR32((x), 22)) +#endif + +#if USE_CSIGMA1_MULTIROT +FORCEINLINE +UINT32 +CSIGMA1( UINT32 x ) +{ + UINT32 res; + x = ROR32( x, 6 ); + res = x; + x = ROR32( x, 5 ); + res ^= x; + x = ROR32( x, 14 ); + res ^= x; + return res; +} +#else +#define CSIGMA1( x ) (ROR32((x), 6) ^ ROR32((x), 11) ^ ROR32((x), 25)) +#endif + +#if USE_LSIGMA0_MULTIROT +FORCEINLINE +UINT32 +LSIGMA0( UINT32 x ) +{ + UINT32 res; + res = x >> 3; + x = ROR32( x, 7 ); + res ^= x; + x = ROR32( x, 11 ); + res ^= x; + return res; +} +#else +#define LSIGMA0( x ) (ROR32((x), 7) ^ ROR32((x), 18) ^ ((x)>> 3)) +#endif + +#if USE_LSIGMA1_MULTIROT +FORCEINLINE +UINT32 +LSIGMA1( UINT32 x ) +{ + UINT32 res; + res = x >> 10; + x = ROR32( x, 17 ); + res ^= x; + x = ROR32( x, 2 ); + res ^= x; + return res; +} +#else +#define LSIGMA1( x ) (ROR32((x), 17) ^ ROR32((x), 19) ^ ((x)>>10)) +#endif + + +// +// The values a-h are stored in an array called ah. +// We have unrolled the loop 16 times. This makes both the indices into +// the ah array constant, and it makes the message addressing constant. +// This provides a significant speed improvement, at the cost of making +// the main loop about 4 kB in code. +// +// The earlier implementation had the loop unrolled 8 times, and is +// around 10 cycles/byte slower. If loading the code from disk takes +// 100 cycles/byte, then we break even once you have hashed 20 kB. +// This is a worthwhile tradeoff as all code is codesigned with SHA-256. +// + +// +// Core round macro +// +// r16 is the round number mod 16, r is the round number. +// r16 is a separate macro argument because it is always a compile-time constant +// which allows much better optimizations of the memory accesses. +// +// ah[ r16 &7] = h +// ah[(r16+1)&7] = g; +// ah[(r16+2)&7] = f; +// ah[(r16+3)&7] = e; +// ah[(r16+4)&7] = d; +// ah[(r16+5)&7] = c; +// ah[(r16+6)&7] = b; +// ah[(r16+7)&7] = a; +// +// After that incrementing the round number will automatically map a->b, b->c, etc. +// +// The core round, after the message word has been computed for this round and put in Wt. +// r16 is the round number modulo 16. (Static after loop unrolling) +// r is the round number (dynamic, which is why we don't use (r&0xf) for r16) +// In more readable form this macro does the following: +// h += CSIGMA( e ) + CH( e, f, g ) + K[round] + W[round]; +// d += h; +// h += CSIGMA( a ) + MAJ( a, b, c ); +// +#define CROUND( r16, r ) {;\ + ah[ r16 &7] += CSIGMA1(ah[(r16+3)&7]) + CH(ah[(r16+3)&7], ah[(r16+2)&7], ah[(r16+1)&7]) + SymCryptSha256K[r] + Wt;\ + ah[(r16+4)&7] += ah[r16 &7];\ + ah[ r16 &7] += CSIGMA0(ah[(r16+7)&7]) + MAJ(ah[(r16+7)&7], ah[(r16+6)&7], ah[(r16+5)&7]);\ +} + +// +// Initial round that reads the message. +// r is the round number 0..15 +// +#define IROUND( r ) {\ + Wt = SYMCRYPT_LOAD_MSBFIRST32( &pbData[ 4*r ] );\ + W[r] = Wt; \ + CROUND(r,r);\ + } + +// +// Subsequent rounds. +// r16 is the round number mod 16. rb is the round number minus r16. +// +#define FROUND(r16, rb) { \ + Wt = LSIGMA1( W[(r16-2) & 15] ) + W[(r16-7) & 15] + \ + LSIGMA0( W[(r16-15) & 15]) + W[r16 & 15]; \ + W[r16] = Wt; \ + CROUND( r16, r16+rb ); \ +} + +// +// UINT32 implementation 1 +// +VOID +SYMCRYPT_CALL +SymCryptSha256AppendBlocks_ul1( + _Inout_ SYMCRYPT_SHA256_CHAINING_STATE * pChain, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ) +{ + SYMCRYPT_ALIGN UINT32 W[16]; + SYMCRYPT_ALIGN UINT32 ah[8]; + int round; + UINT32 Wt; + + while( cbData >= 64 ) + { + ah[7] = pChain->H[0]; + ah[6] = pChain->H[1]; + ah[5] = pChain->H[2]; + ah[4] = pChain->H[3]; + ah[3] = pChain->H[4]; + ah[2] = pChain->H[5]; + ah[1] = pChain->H[6]; + ah[0] = pChain->H[7]; + + // + // initial rounds 1 to 16 + // + + IROUND( 0 ); + IROUND( 1 ); + IROUND( 2 ); + IROUND( 3 ); + IROUND( 4 ); + IROUND( 5 ); + IROUND( 6 ); + IROUND( 7 ); + IROUND( 8 ); + IROUND( 9 ); + IROUND( 10 ); + IROUND( 11 ); + IROUND( 12 ); + IROUND( 13 ); + IROUND( 14 ); + IROUND( 15 ); + + + // + // rounds 16 to 64. + // + for( round=16; round<64; round += 16 ) + { + FROUND( 0, round ); + FROUND( 1, round ); + FROUND( 2, round ); + FROUND( 3, round ); + FROUND( 4, round ); + FROUND( 5, round ); + FROUND( 6, round ); + FROUND( 7, round ); + FROUND( 8, round ); + FROUND( 9, round ); + FROUND( 10, round ); + FROUND( 11, round ); + FROUND( 12, round ); + FROUND( 13, round ); + FROUND( 14, round ); + FROUND( 15, round ); + } + + pChain->H[0] = ah[7] + pChain->H[0]; + pChain->H[1] = ah[6] + pChain->H[1]; + pChain->H[2] = ah[5] + pChain->H[2]; + pChain->H[3] = ah[4] + pChain->H[3]; + pChain->H[4] = ah[3] + pChain->H[4]; + pChain->H[5] = ah[2] + pChain->H[5]; + pChain->H[6] = ah[1] + pChain->H[6]; + pChain->H[7] = ah[0] + pChain->H[7]; + + pbData += 64; + cbData -= 64; + + } + + *pcbRemaining = cbData; + + // + // Wipe the variables; + // + SymCryptWipeKnownSize( ah, sizeof( ah ) ); + SymCryptWipeKnownSize( W, sizeof( W ) ); + SYMCRYPT_FORCE_WRITE32( &Wt, 0 ); +} + +VOID +SYMCRYPT_CALL +SymCryptSha256AppendBlocks_ul2( + _Inout_ SYMCRYPT_SHA256_CHAINING_STATE * pChain, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ) +{ + // + // Different arrangement of the code, currently 25 c/B vs 20 c/b for the version above. + // On Atom: 50 c/B vs 41 c/B for the one above. + // + SYMCRYPT_ALIGN UINT32 buf[4 + 8 + 64]; // chaining state concatenated with the expanded input block + UINT32 * W = &buf[4 + 8]; + UINT32 * ha = &buf[4]; // initial state words, in order h, g, ..., b, a + UINT32 A, B, C, D, T; + int r; + + ha[7] = pChain->H[0]; buf[3] = ha[7]; + ha[6] = pChain->H[1]; buf[2] = ha[6]; + ha[5] = pChain->H[2]; buf[1] = ha[5]; + ha[4] = pChain->H[3]; buf[0] = ha[4]; + ha[3] = pChain->H[4]; + ha[2] = pChain->H[5]; + ha[1] = pChain->H[6]; + ha[0] = pChain->H[7]; + + while( cbData >= 64 ) + { + // + // Capture the input into W[0..15] + // + for( r=0; r<16; r++ ) + { + W[r] = SYMCRYPT_LOAD_MSBFIRST32( &pbData[ 4*r ] ); + } + + // + // Expand the message + // + A = W[15]; + B = W[14]; + D = W[0]; + for( r=16; r<64; r+= 2 ) + { + // Loop invariant: A=W[r-1], B = W[r-2], D = W[r-16] + + // + // Macro for one word of message expansion. + // Invariant: + // on entry: a = W[r-1], b = W[r-2], d = W[r-16] + // on exit: W[r] computed, a = W[r-1], b = W[r], c = W[r-15] + // + #define EXPAND( a, b, c, d, r ) \ + c = W[r-15]; \ + b = d + LSIGMA1( b ) + W[r-7] + LSIGMA0( c ); \ + W[r] = b; \ + + EXPAND( A, B, C, D, r ); + EXPAND( B, A, D, C, (r+1)); + + #undef EXPAND + } + + A = ha[7]; + B = ha[6]; + C = ha[5]; + D = ha[4]; + + for( r=0; r<64; r += 4 ) + { + // + // Loop invariant: + // A, B, C, and D are the a,b,c,d values of the current state. + // W[r] is the next expanded message word to be processed. + // W[r-8 .. r-5] contain the current state words h, g, f, e. + // + + // + // Macro to compute one round + // + #define DO_ROUND( a, b, c, d, t, r ) \ + t = W[r] + CSIGMA1( W[r-5] ) + W[r-8] + CH( W[r-5], W[r-6], W[r-7] ) + SymCryptSha256K[r]; \ + W[r-4] = t + d; \ + d = t + CSIGMA0( a ) + MAJ( c, b, a ); + + DO_ROUND( A, B, C, D, T, r ); + DO_ROUND( D, A, B, C, T, (r+1) ); + DO_ROUND( C, D, A, B, T, (r+2) ); + DO_ROUND( B, C, D, A, T, (r+3) ); + #undef DO_ROUND + } + + buf[3] = ha[7] = buf[3] + A; + buf[2] = ha[6] = buf[2] + B; + buf[1] = ha[5] = buf[1] + C; + buf[0] = ha[4] = buf[0] + D; + ha[3] += W[r-5]; + ha[2] += W[r-6]; + ha[1] += W[r-7]; + ha[0] += W[r-8]; + + pbData += 64; + cbData -= 64; + } + + pChain->H[0] = ha[7]; + pChain->H[1] = ha[6]; + pChain->H[2] = ha[5]; + pChain->H[3] = ha[4]; + pChain->H[4] = ha[3]; + pChain->H[5] = ha[2]; + pChain->H[6] = ha[1]; + pChain->H[7] = ha[0]; + + *pcbRemaining = cbData; + + SymCryptWipeKnownSize( buf, sizeof( buf ) ); + SYMCRYPT_FORCE_WRITE32( &A, 0 ); + SYMCRYPT_FORCE_WRITE32( &B, 0 ); + SYMCRYPT_FORCE_WRITE32( &D, 0 ); + SYMCRYPT_FORCE_WRITE32( &T, 0 ); +} + +#undef CROUND +#undef IROUND +#undef FROUND + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + +// +// Don't omit frame pointer for XMM code; it isn't register-starved as much +// +#if SYMCRYPT_CPU_X86 && SYMCRYPT_MS_VC && !defined(__clang__) +#pragma optimize( "y", off ) +#endif + +#ifdef __clang__ +#pragma clang attribute push (__attribute__((target("ssse3,sha"))), apply_to=function) +#else +#pragma GCC push_options +#pragma GCC target("ssse3,sha") +#endif + +// +// Code that uses the XMM registers. +// This code is currently unused. It was written in case it would provide better performance, but +// it did not. We are retaining it in case it might be useful in a future CPU generation. +// +#if 0 + +#define MAJXMM( x, y, z ) _mm_or_si128( _mm_and_si128( _mm_or_si128( z, y ), x ), _mm_and_si128( z, y )) +#define CHXMM( x, y, z ) _mm_xor_si128( _mm_and_si128( _mm_xor_si128( z, y ), x ), z ) + +#define CSIGMA0XMM( x ) \ + _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( \ + _mm_slli_epi32(x,30) , _mm_srli_epi32(x, 2) ),\ + _mm_slli_epi32(x,19) ), _mm_srli_epi32(x, 13) ),\ + _mm_slli_epi32(x,10) ), _mm_srli_epi32(x, 22) ) +#define CSIGMA1XMM( x ) \ + _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( \ + _mm_slli_epi32(x,26) , _mm_srli_epi32(x, 6) ),\ + _mm_slli_epi32(x,21) ), _mm_srli_epi32(x, 11) ),\ + _mm_slli_epi32(x,7) ), _mm_srli_epi32(x, 25) ) +#define LSIGMA0XMM( x ) \ + _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( \ + _mm_slli_epi32(x,25) , _mm_srli_epi32(x, 7) ),\ + _mm_slli_epi32(x,14) ), _mm_srli_epi32(x, 18) ),\ + _mm_srli_epi32(x, 3) ) +#define LSIGMA1XMM( x ) \ + _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( \ + _mm_slli_epi32(x,15) , _mm_srli_epi32(x, 17) ),\ + _mm_slli_epi32(x,13) ), _mm_srli_epi32(x, 19) ),\ + _mm_srli_epi32(x,10) ) + +VOID +SYMCRYPT_CALL +SymCryptSha256AppendBlocks_xmm1( + _Inout_ SYMCRYPT_SHA256_CHAINING_STATE * pChain, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ) +{ + // + // Implementation that has one value in each XMM register. + // This is significantly slower than the _ul1 implementation + // but can be extended to compute 4 hash blocks in parallel. + // + SYMCRYPT_ALIGN __m128i buf[4 + 8 + 64]; // chaining state concatenated with the expanded input block + __m128i * W = &buf[4 + 8]; + __m128i * ha = &buf[4]; // initial state words, in order h, g, ..., b, a + __m128i A, B, C, D, T; + int r; + + // + // For 1-input only; set the input buffer to zero so that we have known values in every byte + // + //SymCryptWipeKnownSize( buf, sizeof( buf ) ); + + // + // Copy the chaining state into the start of the buffer, order = h,g,f,e,d,c,b,a + // + ha[7] = _mm_insert_epi32(ha[7], pChain->H[0], 0); + ha[6] = _mm_insert_epi32(ha[6], pChain->H[1], 0); + ha[5] = _mm_insert_epi32(ha[5], pChain->H[2], 0); + ha[4] = _mm_insert_epi32(ha[4], pChain->H[3], 0); + ha[3] = _mm_insert_epi32(ha[3], pChain->H[4], 0); + ha[2] = _mm_insert_epi32(ha[2], pChain->H[5], 0); + ha[1] = _mm_insert_epi32(ha[1], pChain->H[6], 0); + ha[0] = _mm_insert_epi32(ha[0], pChain->H[7], 0); + + buf[0] = ha[4]; + buf[1] = ha[5]; + buf[2] = ha[6]; + buf[3] = ha[7]; + + while( cbData >= 64 ) + { + + // + // Capture the input into W[0..15] + // + for( r=0; r<16; r++ ) + { + W[r] = _mm_insert_epi32(W[r], SYMCRYPT_LOAD_MSBFIRST32( &pbData[ 4*r ] ), 0); + } + + // + // Expand the message + // + A = W[15]; + B = W[14]; + D = W[0]; + for( r=16; r<64; r+= 2 ) + { + // Loop invariant: A=W[r-1], B = W[r-2], D = W[r-16] + + // + // Macro for one word of message expansion. + // Invariant: + // on entry: a = W[r-1], b = W[r-2], d = W[r-16] + // on exit: W[r] computed, a = W[r-1], b = W[r], c = W[r-15] + // + #define EXPAND( a, b, c, d, r ) \ + c = W[r-15]; \ + b = _mm_add_epi32( _mm_add_epi32( _mm_add_epi32( d, LSIGMA1XMM( b ) ), W[r-7] ), LSIGMA0XMM( c ) ); \ + W[r] = b; \ + + EXPAND( A, B, C, D, r ); + EXPAND( B, A, D, C, (r+1)); + + #undef EXPAND + } + + A = ha[7]; + B = ha[6]; + C = ha[5]; + D = ha[4]; + + for( r=0; r<64; r += 4 ) + { + // + // Loop invariant: + // A, B, C, and D are the a,b,c,d values of the current state. + // W[r] is the next expanded message word to be processed. + // W[r-8 .. r-5] contain the current state words h, g, f, e. + // + + // + // Macro to compute one round + // + #define DO_ROUND( a, b, c, d, t, r ) \ + t = W[r]; \ + t = _mm_add_epi32( t, CSIGMA1XMM( W[r-5] ) ); \ + t = _mm_add_epi32( t, W[r-8] ); \ + t = _mm_add_epi32( t, CHXMM( W[r-5], W[r-6], W[r-7] ) ); \ + t = _mm_add_epi32( t, _mm_cvtsi32_si128( SymCryptSha256K[r] ) ); \ + W[r-4] = _mm_add_epi32( t, d ); \ + d = _mm_add_epi32( t, CSIGMA0XMM( a ) ); \ + d = _mm_add_epi32( d, MAJXMM( c, b, a ) ); + + DO_ROUND( A, B, C, D, T, r ); + DO_ROUND( D, A, B, C, T, (r+1) ); + DO_ROUND( C, D, A, B, T, (r+2) ); + DO_ROUND( B, C, D, A, T, (r+3) ); + #undef DO_ROUND + } + + buf[3] = ha[7] = _mm_add_epi32( buf[3], A ); + buf[2] = ha[6] = _mm_add_epi32( buf[2], B ); + buf[1] = ha[5] = _mm_add_epi32( buf[1], C ); + buf[0] = ha[4] = _mm_add_epi32( buf[0], D ); + ha[3] = _mm_add_epi32( ha[3], W[r-5] ); + ha[2] = _mm_add_epi32( ha[2], W[r-6] ); + ha[1] = _mm_add_epi32( ha[1], W[r-7] ); + ha[0] = _mm_add_epi32( ha[0], W[r-8] ); + + pbData += 64; + cbData -= 64; + } + + // + // Copy the chaining state back into the hash structure + // + pChain->H[0] = _mm_extract_epi32(ha[7], 0); + pChain->H[1] = _mm_extract_epi32(ha[6], 0); + pChain->H[2] = _mm_extract_epi32(ha[5], 0); + pChain->H[3] = _mm_extract_epi32(ha[4], 0); + pChain->H[4] = _mm_extract_epi32(ha[3], 0); + pChain->H[5] = _mm_extract_epi32(ha[2], 0); + pChain->H[6] = _mm_extract_epi32(ha[1], 0); + pChain->H[7] = _mm_extract_epi32(ha[0], 0); + + *pcbRemaining = cbData; + + SymCryptWipeKnownSize( buf, sizeof( buf ) ); + SymCryptWipeKnownSize( &A, sizeof( A ) ); + SymCryptWipeKnownSize( &B, sizeof( B ) ); + SymCryptWipeKnownSize( &C, sizeof( C ) ); + SymCryptWipeKnownSize( &D, sizeof( D ) ); + SymCryptWipeKnownSize( &T, sizeof( T ) ); +} + + +// +// XMM implementation 2 +// We use the XMM registers to compute part of the message schedule. +// The load, BSWAP, and part of the message schedule recursion are done in XMM registers. +// The rest of the work is done using integers. +// +// Core2: 0.1 c/B slower than the _ul1 +// Atom: 1.0 c/B slower than _ul1 (42.34 vs 41.39 c/B) +// +VOID +SYMCRYPT_CALL +SymCryptSha256AppendBlocks_xmm2( + _Inout_ SYMCRYPT_SHA256_CHAINING_STATE * pChain, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ) +{ + SYMCRYPT_ALIGN union { UINT32 ul[16]; __m128i xmm[4]; } W; + SYMCRYPT_ALIGN UINT32 ah[8]; + int round; + UINT32 Wt; + const __m128i BYTE_REVERSE_32 = _mm_set_epi8( 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3 ); + + ah[7] = pChain->H[0]; + ah[6] = pChain->H[1]; + ah[5] = pChain->H[2]; + ah[4] = pChain->H[3]; + ah[3] = pChain->H[4]; + ah[2] = pChain->H[5]; + ah[1] = pChain->H[6]; + ah[0] = pChain->H[7]; + +#define CROUND( r16, r ) {;\ + ah[ r16 &7] += CSIGMA1(ah[(r16+3)&7]) + CH(ah[(r16+3)&7], ah[(r16+2)&7], ah[(r16+1)&7]) + SymCryptSha256K[r] + Wt;\ + ah[(r16+4)&7] += ah[r16 &7];\ + ah[ r16 &7] += CSIGMA0(ah[(r16+7)&7]) + MAJ(ah[(r16+7)&7], ah[(r16+6)&7], ah[(r16+5)&7]);\ +} + + +// +// Initial round that reads the message. +// r is the round number 0..15 +// +// Wt = LOAD_MSBFIRST32( &pbData[ 4*r ] );\ +// W.ul[r] = Wt; \ + +#define IROUND( r ) {\ + Wt = W.ul[r];\ + CROUND(r,r);\ + } + +// +// Subsequent rounds. +// r16 is the round number mod 16. rb is the round number minus r16. +// +#define FROUND(r16, rb) { \ + Wt = W.ul[r16];\ + CROUND( r16, r16+rb ); \ +} + + + while( cbData >= 64 ) + { + // + // The code is faster if we directly access the W.ul array, rather than the W.xmm alias. + // I think the compiler gets more confused if you use the W.xmm values. + // We retain them in the union to ensure alignment + // + _mm_store_si128( (__m128i *)&W.ul[ 0], _mm_shuffle_epi8( _mm_loadu_si128( (__m128i *)&pbData[ 0 ] ), BYTE_REVERSE_32 )); + _mm_store_si128( (__m128i *)&W.ul[ 4], _mm_shuffle_epi8( _mm_loadu_si128( (__m128i *)&pbData[ 16 ] ), BYTE_REVERSE_32 )); + _mm_store_si128( (__m128i *)&W.ul[ 8], _mm_shuffle_epi8( _mm_loadu_si128( (__m128i *)&pbData[ 32 ] ), BYTE_REVERSE_32 )); + _mm_store_si128( (__m128i *)&W.ul[12], _mm_shuffle_epi8( _mm_loadu_si128( (__m128i *)&pbData[ 48 ] ), BYTE_REVERSE_32 )); + + // + // initial rounds 1 to 16 + // + + IROUND( 0 ); + IROUND( 1 ); + IROUND( 2 ); + IROUND( 3 ); + IROUND( 4 ); + IROUND( 5 ); + IROUND( 6 ); + IROUND( 7 ); + IROUND( 8 ); + IROUND( 9 ); + IROUND( 10 ); + IROUND( 11 ); + IROUND( 12 ); + IROUND( 13 ); + IROUND( 14 ); + IROUND( 15 ); + + + // + // rounds 16 to 64. + // + for( round=16; round<64; round += 16 ) + { + __m128i Tmp; + + Tmp = _mm_add_epi32( _mm_add_epi32( + LSIGMA0XMM(_mm_loadu_si128( (__m128i *)&W.ul[1] )), + _mm_load_si128( (__m128i *)&W.ul[0] ) ), + _mm_loadu_si128( (__m128i *)&W.ul[9] ) ); + + // + // The final part of the message schedule can be done in XMM registers, but it isn't worth it. + // The rotates in XMM take two shifts and an OR/XOR, vs one instruction in integer registers. + // As the sigma1( W_{t-2} ) recursion component can only be computed 2 at a time + // (because the result of the first two are the inputs to the second two) + // you lose more than you gain by using XMM registers. + // + //Tmp = _mm_add_epi32( Tmp, LSIGMA1XMM( _mm_srli_si128( _mm_load_si128( (__m128i *)&W.ul[12] ), 8 ) ) ); + //Tmp = _mm_add_epi32( Tmp, LSIGMA1XMM( _mm_slli_si128( Tmp, 8 ) ) ); + //_mm_store_si128( (__m128i *)&W.ul[0], Tmp ); + // + + _mm_store_si128( (__m128i *)&W.ul[0], Tmp ); + W.ul[0] += LSIGMA1( W.ul[14] ); + W.ul[1] += LSIGMA1( W.ul[15] ); + W.ul[2] += LSIGMA1( W.ul[0] ); + W.ul[3] += LSIGMA1( W.ul[1] ); + + FROUND( 0, round ); + FROUND( 1, round ); + FROUND( 2, round ); + FROUND( 3, round ); + + Tmp = _mm_add_epi32( _mm_add_epi32( + LSIGMA0XMM(_mm_loadu_si128( (__m128i *)&W.ul[5] )), + _mm_load_si128( (__m128i *)&W.ul[4] ) ), + _mm_alignr_epi8( _mm_load_si128( (__m128i *)&W.ul[0] ), _mm_load_si128( (__m128i *)&W.ul[12] ), 4) ); + + _mm_store_si128( (__m128i *)&W.ul[4], Tmp ); + + W.ul[4] += LSIGMA1( W.ul[2] ); + W.ul[5] += LSIGMA1( W.ul[3] ); + W.ul[6] += LSIGMA1( W.ul[4] ); + W.ul[7] += LSIGMA1( W.ul[5] ); + + FROUND( 4, round ); + FROUND( 5, round ); + FROUND( 6, round ); + FROUND( 7, round ); + + Tmp = _mm_add_epi32( _mm_add_epi32( + LSIGMA0XMM(_mm_loadu_si128( (__m128i *)&W.ul[9] )), + _mm_load_si128( (__m128i *)&W.ul[8] ) ), + _mm_loadu_si128( (__m128i *)&W.ul[1] ) ); + + _mm_store_si128( (__m128i *)&W.ul[8], Tmp ); + W.ul[ 8] += LSIGMA1( W.ul[6] ); + W.ul[ 9] += LSIGMA1( W.ul[7] ); + W.ul[10] += LSIGMA1( W.ul[8] ); + W.ul[11] += LSIGMA1( W.ul[9] ); + + FROUND( 8, round ); + FROUND( 9, round ); + FROUND( 10, round ); + FROUND( 11, round ); + + + Tmp = _mm_add_epi32( _mm_add_epi32( + LSIGMA0XMM( _mm_alignr_epi8( _mm_load_si128( (__m128i *)&W.ul[0] ), _mm_load_si128( (__m128i *)&W.ul[12] ), 4) ), + _mm_load_si128( (__m128i *)&W.ul[12] ) ), + _mm_loadu_si128( (__m128i *)&W.ul[5] ) ); + + _mm_store_si128( (__m128i *)&W.ul[12], Tmp ); + W.ul[12] += LSIGMA1( W.ul[10] ); + W.ul[13] += LSIGMA1( W.ul[11] ); + W.ul[14] += LSIGMA1( W.ul[12] ); + W.ul[15] += LSIGMA1( W.ul[13] ); + + FROUND( 12, round ); + FROUND( 13, round ); + FROUND( 14, round ); + FROUND( 15, round ); + } + + pChain->H[0] = ah[7] = ah[7] + pChain->H[0]; + pChain->H[1] = ah[6] = ah[6] + pChain->H[1]; + pChain->H[2] = ah[5] = ah[5] + pChain->H[2]; + pChain->H[3] = ah[4] = ah[4] + pChain->H[3]; + pChain->H[4] = ah[3] = ah[3] + pChain->H[4]; + pChain->H[5] = ah[2] = ah[2] + pChain->H[5]; + pChain->H[6] = ah[1] = ah[1] + pChain->H[6]; + pChain->H[7] = ah[0] = ah[0] + pChain->H[7]; + + pbData += 64; + cbData -= 64; + + } + + *pcbRemaining = cbData; + + // + // Wipe the variables; + // + SymCryptWipeKnownSize( ah, sizeof( ah ) ); + SymCryptWipeKnownSize( &W, sizeof( W ) ); + SYMCRYPT_FORCE_WRITE32( &Wt, 0 ); + +#undef IROUND +#undef FROUND +#undef CROUND +} + +#endif + +// +// SHA-NI Implementation +// + +#if SYMCRYPT_MS_VC && !defined(__clang__) +// Intrinsic definitions included here +// until the header is updated. +// ******************************* +// ******************************* +// ******************************* +extern __m128i _mm_sha256rnds2_epu32(__m128i, __m128i, __m128i); +extern __m128i _mm_sha256msg1_epu32(__m128i, __m128i); +extern __m128i _mm_sha256msg2_epu32(__m128i, __m128i); +// ******************************* +// ******************************* +// ******************************* +#endif + +// For the SHA-NI implementation we will utilize 128-bit XMM registers. Each +// XMM state will be denoted as (R_3, R_2, R_1, R_0), where each R_i +// is a 32-bit word and R_i refers to bits [32*i : (32*i + 31)] of the +// 128-bit XMM state. +// +// The following macro updates the state variables A,B,C,...,H of the SHA algorithms +// for 4 rounds using: +// - The current round number t with 0<=t<= 63 and t a multiple of 4. +// - A current message XMM state _MSG which consists of 4 32-bit words +// ( W_(t+3), W_(t+2), W_(t+1), W_(t+0) ). +// - Two XMM states _ABEF and _CDGH which contain the variables +// ( A, B, E, F ) and ( C, D, G, H ) respectively. + +#define SHANI_UPDATE_STATE( _round, _MSG, _ABEF, _CDGH ) \ + _MSG = _mm_add_epi32( _MSG, *(__m128i *)&SymCryptSha256K[_round] ); /* Add the K_t constants to the W_t's */ \ + _CDGH = _mm_sha256rnds2_epu32( _CDGH, _ABEF, _MSG ); /* 2 rounds using SHA-NI */ \ + _MSG = _mm_shuffle_epi32( _MSG, 0x0e ); /* Move words 2 & 3 to positions 0 & 1 */ \ + _ABEF = _mm_sha256rnds2_epu32( _ABEF, _CDGH, _MSG ); /* 2 rounds using SHA-NI */ + +// For the SHA message schedule (i.e. to create words W_16 to W_63) we use 4 XMM states / accumulators. +// Each accumulator holds 4 words. +// +// The final result for each word will be of the form W_t = X_t + Y_t, where +// X_t = W_(t-16) + \sigma_0(W_(t-15)) and +// Y_t = W_(t- 7) + \sigma_1(W_(t- 2)) +// +// The X_t's are calculated by the _mm_sha256msg1_epu32 intrinsic. +// The \sigma_1(W_(t-2)) part of the Y_t's by the _mm_sha256msg2_epu32 intrinsic. +// +// Remarks: +// - Calculation of the first four X_t's (i.e. 16<=t<=19) can start from round 4 (since 19-15 = 4). +// - Calculation of the first four Y_t's can start from round 12 (since 19-7=12 and W_(19-7) is calculated +// in the intrinsic call). +// - Due to the W_(t-7) term, producing the Y_t's need special shifting via the _mm_alignr_epi8 intrinsic and +// adding the correct accumulator into another variable MTEMP. +// +// For rounds 16 - 51 we execute the following macro in a loop. For all the other rounds we +// use specific code. +// +// The loop invariant to be satisfied at the beginning of iteration i (corresponding to rounds +// (16+4*i) to (19+4*i) ) is the following: +// _MSG_0 = ( W_(19 + 4*i), W_(18 + 4*i), W_(17 + 4*i), W_(16 + 4*i) ) +// _MSG_1 = ( X_(23 + 4*i), X_(22 + 4*i), X_(21 + 4*i), X_(20 + 4*i) ) +// _MSG_2 = ( X_(27 + 4*i), X_(26 + 4*i), X_(25 + 4*i), X_(24 + 4*i) ) +// _MSG_3 = ( W_(15 + 4*i), W_(14 + 4*i), W_(13 + 4*i), W_(12 + 4*i) ) +// +#define SHANI_MESSAGE_SCHEDULE( _MSG_0, _MSG_1, _MSG_2, _MSG_3, _MTEMP ) \ + _MTEMP = _mm_alignr_epi8( _MSG_0, _MSG_3, 4); /* _MTEMP := ( W_(16 + 4*i), W_(15 + 4*i), W_(14 + 4*i), W_(13 + 4*i) ) */ \ + _MSG_1 = _mm_add_epi32( _MSG_1, _MTEMP); /* _MSG_1 := _MSG_1 + ( W_(16 + 4*i), W_(15 + 4*i), W_(14 + 4*i), W_(13 + 4*i) ) */ \ + _MSG_1 = _mm_sha256msg2_epu32( _MSG_1, _MSG_0 ); /* _MSG_1 := ( W_(23 + 4*i), W_(22 + 4*i), W_(21 + 4*i), W_(20 + 4*i) ) */ \ + _MSG_3 = _mm_sha256msg1_epu32( _MSG_3, _MSG_0 ); /* _MSG_3 := ( X_(31+4*i), X_(30+4*i), X_(29+4*i), X_(28+4*i) ) */ +// +// After each iteration the subsequent call rotates the accumulators so that the loop +// invariant is preserved (please verify!): +// -- MSG_0 <---- MSG_1 <--- MSG_2 <--- MSG_3 <-- +// | | +// ---------------------------------------------- + +VOID +SYMCRYPT_CALL +SymCryptSha256AppendBlocks_shani( + _Inout_ SYMCRYPT_SHA256_CHAINING_STATE * pChain, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ) +{ + const __m128i BYTE_REVERSE_32 = _mm_set_epi8( 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3 ); + + // Our chain state is in order A, B, ..., H. + // First load our chaining state + __m128i DCBA = _mm_loadu_si128( (__m128i *)&(pChain->H[0]) ); // (D, C, B, A) + __m128i HGFE = _mm_loadu_si128( (__m128i *)&(pChain->H[4]) ); // (H, G, F, E) + __m128i FEBA = _mm_unpacklo_epi64( DCBA, HGFE ); // (F, E, B, A) + __m128i HGDC = _mm_unpackhi_epi64( DCBA, HGFE ); // (H, G, D, C) + __m128i ABEF = _mm_shuffle_epi32( FEBA, 0x1b ); // (A, B, E, F) + __m128i CDGH = _mm_shuffle_epi32( HGDC, 0x1b ); // (C, D, G, H) + + while( cbData >= 64 ) + { + // Save the current state for the feed-forward later + __m128i ABEF_start = ABEF; + __m128i CDGH_start = CDGH; + + // Current message and temporary state + __m128i MSG; + + // Accumulators + __m128i MSG_0; + __m128i MSG_1; + __m128i MSG_2; + __m128i MSG_3; + + // Rounds 0-3 + MSG = _mm_loadu_si128( (__m128i *)pbData ); // Reversed word - ( M_3, M_2, M_1, M_0 ) + pbData += 16; + MSG = _mm_shuffle_epi8( MSG, BYTE_REVERSE_32 ); // Reverse each word + MSG_0 = MSG; // MSG_0 := ( W_3 = M3, W_2 = M_2, W_1 = M_1, W_0 = M_0 ) + + SHANI_UPDATE_STATE( 0, MSG, ABEF, CDGH ); + + // Rounds 4-7 + MSG = _mm_loadu_si128( (__m128i *)pbData ); // Reversed word - ( M_7, M_6, M_5, M_4 ) + pbData += 16; + MSG = _mm_shuffle_epi8( MSG, BYTE_REVERSE_32 ); // Reverse each word + MSG_1 = MSG; // MSG_1 := ( W_7 = M_7, W_6 = M_6, W_5 = M_5, W_4 = M_4 ) + + SHANI_UPDATE_STATE( 4, MSG, ABEF, CDGH ); + + MSG_0 = _mm_sha256msg1_epu32( MSG_0, MSG_1 ); // MSG_0 := ( X_19, X_18, X_17, X_16 ) = + // ( W_3 + \sigma_0(W_4), ..., W_0 + \sigma_0(W_1) ) + + // Rounds 8-11 + MSG = _mm_loadu_si128( (__m128i *)pbData ); // Reversed word - ( M_11, M_10, M_9, M_8 ) + pbData += 16; + MSG = _mm_shuffle_epi8( MSG, BYTE_REVERSE_32 ); // Reverse each word + MSG_2 = MSG; // MSG_2 := ( W_11 = M_11, W_10 = M_10, W_9 = M_9, W_8 = M_8 ) + + SHANI_UPDATE_STATE( 8, MSG, ABEF, CDGH ); + + MSG_1 = _mm_sha256msg1_epu32( MSG_1, MSG_2 ); // MSG_1 := ( X_23, X_22, X_21, X_20 ) + + // Rounds 12-15 + MSG = _mm_loadu_si128( (__m128i *)pbData ); // Reversed word - ( M_15, M_14, M_13, M_12 ) + pbData += 16; + MSG = _mm_shuffle_epi8( MSG, BYTE_REVERSE_32 ); // Reverse each word + MSG_3 = MSG; // MSG_3 := ( W_15 = M_15, W_14 = M_14, W_13 = M_13, W_12 = M_12 ) + + SHANI_UPDATE_STATE( 12, MSG, ABEF, CDGH ); + + MSG = _mm_alignr_epi8( MSG_3, MSG_2, 4); // MSG := ( W_12, W_11, W_10, W_9 ) + MSG_0 = _mm_add_epi32( MSG_0, MSG); // MSG_0 := MSG_0 + ( W_12, W_11, W_10, W_9 ) + MSG_0 = _mm_sha256msg2_epu32( MSG_0, MSG_3 ); // MSG_0 := ( W_19, W_18, W_17, W_16 ) = + // ( X_19 + W_12 + \sigma_1(W_17)], ..., X_16 + W_9 + \sigma_1(W_14)] ) + + MSG_2 = _mm_sha256msg1_epu32( MSG_2, MSG_3 ); // MSG_2 := ( X_27, X_26, X_25, X_24 ) + + + // Rounds 16 - 19 + MSG = MSG_0; + SHANI_UPDATE_STATE( 16, MSG, ABEF, CDGH ); + SHANI_MESSAGE_SCHEDULE( MSG_0, MSG_1, MSG_2, MSG_3, MSG ); + + // Rounds 20 - 23 + MSG = MSG_1; + SHANI_UPDATE_STATE( 20, MSG, ABEF, CDGH ); + SHANI_MESSAGE_SCHEDULE( MSG_1, MSG_2, MSG_3, MSG_0, MSG ); + + // Rounds 24 - 27 + MSG = MSG_2; + SHANI_UPDATE_STATE( 24, MSG, ABEF, CDGH ); + SHANI_MESSAGE_SCHEDULE( MSG_2, MSG_3, MSG_0, MSG_1, MSG ); + + // Rounds 28 - 31 + MSG = MSG_3; + SHANI_UPDATE_STATE( 28, MSG, ABEF, CDGH ); + SHANI_MESSAGE_SCHEDULE( MSG_3, MSG_0, MSG_1, MSG_2, MSG ); + + // Rounds 32 - 35 + MSG = MSG_0; + SHANI_UPDATE_STATE( 32, MSG, ABEF, CDGH ); + SHANI_MESSAGE_SCHEDULE( MSG_0, MSG_1, MSG_2, MSG_3, MSG ); + + // Rounds 36 - 39 + MSG = MSG_1; + SHANI_UPDATE_STATE( 36, MSG, ABEF, CDGH ); + SHANI_MESSAGE_SCHEDULE( MSG_1, MSG_2, MSG_3, MSG_0, MSG ); + + // Rounds 40 - 43 + MSG = MSG_2; + SHANI_UPDATE_STATE( 40, MSG, ABEF, CDGH ); + SHANI_MESSAGE_SCHEDULE( MSG_2, MSG_3, MSG_0, MSG_1, MSG ); + + // Rounds 44 - 47 + MSG = MSG_3; + SHANI_UPDATE_STATE( 44, MSG, ABEF, CDGH ); + SHANI_MESSAGE_SCHEDULE( MSG_3, MSG_0, MSG_1, MSG_2, MSG ); + + // Rounds 48 - 51 + MSG = MSG_0; + SHANI_UPDATE_STATE( 48, MSG, ABEF, CDGH ); + SHANI_MESSAGE_SCHEDULE( MSG_0, MSG_1, MSG_2, MSG_3, MSG ); + + // Rounds 52 - 55 + MSG = MSG_1; // ( W_55, W_54, W_53, W_52 ) + SHANI_UPDATE_STATE( 52, MSG, ABEF, CDGH ); + + MSG = _mm_alignr_epi8( MSG_1, MSG_0, 4); // MSG := ( W_52, W_51, W_50, W_49 ) + MSG_2 = _mm_add_epi32( MSG_2, MSG); // MSG_2 := MSG_2 + ( W_52, W_51, W_50, W_49 ) + MSG_2 = _mm_sha256msg2_epu32( MSG_2, MSG_1 ); // Calculate ( W_59, W_58, W_57, W_56 ) + + // Rounds 56 - 59 + MSG = MSG_2; // ( W_59, W_58, W_57, W_56 ) + SHANI_UPDATE_STATE( 56, MSG, ABEF, CDGH ); + + MSG = _mm_alignr_epi8( MSG_2, MSG_1, 4); // MSG := ( W_56, W_55, W_54, W_53 ) + MSG_3 = _mm_add_epi32( MSG_3, MSG); // MSG_3 := MSG_3 + ( W_56, W_55, W_54, W_53 ) + MSG_3 = _mm_sha256msg2_epu32( MSG_3, MSG_2 ); // Calculate ( W_63, W_62, W_61, W_60 ) + + // Rounds 60 - 63 + SHANI_UPDATE_STATE( 60, MSG_3, ABEF, CDGH ); + + // Add the feed-forward + ABEF = _mm_add_epi32( ABEF, ABEF_start ); + CDGH = _mm_add_epi32( CDGH, CDGH_start ); + + cbData -= 64; + } + + // Unpack the state registers and store them in the state + FEBA = _mm_shuffle_epi32( ABEF, 0x1b ); + HGDC = _mm_shuffle_epi32( CDGH, 0x1b ); + DCBA = _mm_unpacklo_epi64( FEBA, HGDC ); // (D, C, B, A) + HGFE = _mm_unpackhi_epi64( FEBA, HGDC ); // (H, G, F, E) + _mm_storeu_si128 ( (__m128i *)&(pChain->H[0]), DCBA); // (D, C, B, A) + _mm_storeu_si128 ( (__m128i *)&(pChain->H[4]), HGFE); // (H, G, F, E) + + *pcbRemaining = cbData; +} + +#undef SHANI_UPDATE_STATE +#undef SHANI_MESSAGE_SCHEDULE + +#ifdef __clang__ +#pragma clang attribute pop +#else +#pragma GCC pop_options +#endif + +#endif // SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + +#if SYMCRYPT_CPU_ARM64 +/* +ARM64 has special SHA-256 instructions + +SHA256H and SHA256H2 implement 4 rounds of SHA-256. The inputs are two registers containing the 256-bit state, +and one register containing 128 bits of expanded message plus the round constants. +These instructions perform the same computation, but SHA256H returns the first half of the 256-bit result, +and SHA256H2 returns the second half of the 256-bit result. + +SHA256H( ABCDE, FGHIJ, W ) +Where the least significant word of the ABCDE vector is A. The W vector contains W_i + K_i for the four rounds being computed. + +SHA256SU0 is the message schedule update function. +It takes 2 inputs and produces 1 output. +We describe the vectors for i=0,1,2,3 +Inputs: [W_{t-16+i}], [W_{t-12+i}] +Output: [Sigma0(W_{t-15+i}) + W_{t-16+i}] + +SHA256SU1 is the second message schedule update function +Takes 3 inputs and produces 1 output +Input 1: Output of SHA256SU0: [Sigma0(W_{t-15+i}) + W_{t-16+i}] +Input 2: +Input 3: [W_{t-4+i}] + +*/ + +#ifdef __clang__ +#pragma clang attribute push (__attribute__((target("sha2"))), apply_to=function) +#else +#pragma GCC push_options +#pragma GCC target("sha2") +#endif + +#define vldq(_p) (*(__n128 *)(_p)) +#define vstq(_p, _v) (*(__n128 *)(_p) = (_v) ) + +VOID +SYMCRYPT_CALL +SymCryptSha256AppendBlocks_instr( + _Inout_ SYMCRYPT_SHA256_CHAINING_STATE * pChain, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ) +{ + // + // Armv8 has 32 Neon registers. We can use a lot of variables. + // 16 for the constants, 4 for the message, 2 for the current state, 2 for the starting state, + // total = 24 which leaves enough for some temp values + // + __n128 ABCD, ABCDstart; + __n128 EFGH, EFGHstart; + __n128 W0, W1, W2, W3; + __n128 K0, K1, K2, K3, K4, K5, K6, K7, K8, K9, K10, K11, K12, K13, K14, K15; + + __n128 Wr; + __n128 t; + + ABCD = ABCDstart = vldq( &pChain->H[0] ); + EFGH = EFGHstart = vldq( &pChain->H[4] ); + + K0 = vldq( &SymCryptSha256K[ 4 * 0 ] ); + K1 = vldq( &SymCryptSha256K[ 4 * 1 ] ); + K2 = vldq( &SymCryptSha256K[ 4 * 2 ] ); + K3 = vldq( &SymCryptSha256K[ 4 * 3 ] ); + K4 = vldq( &SymCryptSha256K[ 4 * 4 ] ); + K5 = vldq( &SymCryptSha256K[ 4 * 5 ] ); + K6 = vldq( &SymCryptSha256K[ 4 * 6 ] ); + K7 = vldq( &SymCryptSha256K[ 4 * 7 ] ); + K8 = vldq( &SymCryptSha256K[ 4 * 8 ] ); + K9 = vldq( &SymCryptSha256K[ 4 * 9 ] ); + K10 = vldq( &SymCryptSha256K[ 4 * 10 ] ); + K11 = vldq( &SymCryptSha256K[ 4 * 11 ] ); + K12 = vldq( &SymCryptSha256K[ 4 * 12 ] ); + K13 = vldq( &SymCryptSha256K[ 4 * 13 ] ); + K14 = vldq( &SymCryptSha256K[ 4 * 14 ] ); + K15 = vldq( &SymCryptSha256K[ 4 * 15 ] ); + + while( cbData >= 64 ) + { + W0 = vrev32q_u8( vldq( &pbData[ 0] ) ); + W1 = vrev32q_u8( vldq( &pbData[16] ) ); + W2 = vrev32q_u8( vldq( &pbData[32] ) ); + W3 = vrev32q_u8( vldq( &pbData[48] ) ); + + // + // The sha256h/sha256h2 instructions overwrite one of the two state input registers. + // This implies we have to have a copy made of one of the input states. + // +#define ROUNDOP {\ + t = ABCD;\ + ABCD = vsha256hq_u32 ( ABCD, EFGH, Wr );\ + EFGH = vsha256h2q_u32( EFGH, t, Wr );\ + } + + Wr = vaddq_u32( W0, K0 ); + ROUNDOP; + Wr = vaddq_u32( W1, K1 ); + ROUNDOP; + Wr = vaddq_u32( W2, K2 ); + ROUNDOP; + Wr = vaddq_u32( W3, K3 ); + ROUNDOP; + + t = vsha256su0q_u32( W0, W1 ); + W0 = vsha256su1q_u32( t, W2, W3 ); + Wr = vaddq_u32( W0, K4 ); + ROUNDOP; + + t = vsha256su0q_u32( W1, W2 ); + W1 = vsha256su1q_u32( t, W3, W0 ); + Wr = vaddq_u32( W1, K5 ); + ROUNDOP; + + t = vsha256su0q_u32( W2, W3 ); + W2 = vsha256su1q_u32( t, W0, W1 ); + Wr = vaddq_u32( W2, K6 ); + ROUNDOP; + + t = vsha256su0q_u32( W3, W0 ); + W3 = vsha256su1q_u32( t, W1, W2 ); + Wr = vaddq_u32( W3, K7 ); + ROUNDOP; + + + t = vsha256su0q_u32( W0, W1 ); + W0 = vsha256su1q_u32( t, W2, W3 ); + Wr = vaddq_u32( W0, K8 ); + ROUNDOP; + + t = vsha256su0q_u32( W1, W2 ); + W1 = vsha256su1q_u32( t, W3, W0 ); + Wr = vaddq_u32( W1, K9 ); + ROUNDOP; + + t = vsha256su0q_u32( W2, W3 ); + W2 = vsha256su1q_u32( t, W0, W1 ); + Wr = vaddq_u32( W2, K10 ); + ROUNDOP; + + t = vsha256su0q_u32( W3, W0 ); + W3 = vsha256su1q_u32( t, W1, W2 ); + Wr = vaddq_u32( W3, K11 ); + ROUNDOP; + + + t = vsha256su0q_u32( W0, W1 ); + W0 = vsha256su1q_u32( t, W2, W3 ); + Wr = vaddq_u32( W0, K12 ); + ROUNDOP; + + t = vsha256su0q_u32( W1, W2 ); + W1 = vsha256su1q_u32( t, W3, W0 ); + Wr = vaddq_u32( W1, K13 ); + ROUNDOP; + + t = vsha256su0q_u32( W2, W3 ); + W2 = vsha256su1q_u32( t, W0, W1 ); + Wr = vaddq_u32( W2, K14 ); + ROUNDOP; + + t = vsha256su0q_u32( W3, W0 ); + W3 = vsha256su1q_u32( t, W1, W2 ); + Wr = vaddq_u32( W3, K15 ); + ROUNDOP; + + ABCDstart = ABCD = vaddq_u32( ABCDstart, ABCD ); + EFGHstart = EFGH = vaddq_u32( EFGHstart, EFGH ); + + pbData += 64; + cbData -= 64; +#undef ROUNDOP + + } + + *pcbRemaining = cbData; + vstq( &pChain->H[0], ABCD ); + vstq( &pChain->H[4], EFGH ); + + // + // All our local variables should be in registers, so no way to wipe them. + // +} + +#ifdef __clang__ +#pragma clang attribute pop +#else +#pragma GCC pop_options +#endif + +#endif + + + +// +// Easy switch between different implementations +// +//FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptSha256AppendBlocks( + _Inout_ SYMCRYPT_SHA256_CHAINING_STATE* pChain, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T* pcbRemaining) +{ +#if SYMCRYPT_CPU_AMD64 + + SYMCRYPT_EXTENDED_SAVE_DATA SaveData; + + if (SYMCRYPT_CPU_FEATURES_PRESENT(SYMCRYPT_CPU_FEATURES_FOR_SHANI_CODE) && + SymCryptSaveXmm(&SaveData) == SYMCRYPT_NO_ERROR) + { + SymCryptSha256AppendBlocks_shani(pChain, pbData, cbData, pcbRemaining); + + SymCryptRestoreXmm(&SaveData); + } + // Temporarily disabling use of Ymm in SHA2 + // else if (SYMCRYPT_CPU_FEATURES_PRESENT(SYMCRYPT_CPU_FEATURE_AVX2 | SYMCRYPT_CPU_FEATURE_BMI2) && + // SymCryptSaveYmm(&SaveData) == SYMCRYPT_NO_ERROR) + // { + // //SymCryptSha256AppendBlocks_ul1(pChain, pbData, cbData, pcbRemaining); + // //SymCryptSha256AppendBlocks_ymm_8blocks(pChain, pbData, cbData, pcbRemaining); + // SymCryptSha256AppendBlocks_ymm_avx2_asm(pChain, pbData, cbData, pcbRemaining); + + // SymCryptRestoreYmm(&SaveData); + // } + else if (SYMCRYPT_CPU_FEATURES_PRESENT(SYMCRYPT_CPU_FEATURE_SSSE3 | SYMCRYPT_CPU_FEATURE_BMI2) && + SymCryptSaveXmm(&SaveData) == SYMCRYPT_NO_ERROR) + { + //SymCryptSha256AppendBlocks_xmm_4blocks(pChain, pbData, cbData, pcbRemaining); + SymCryptSha256AppendBlocks_xmm_ssse3_asm(pChain, pbData, cbData, pcbRemaining); + + SymCryptRestoreXmm(&SaveData); + } + else + { + SymCryptSha256AppendBlocks_ul1( pChain, pbData, cbData, pcbRemaining ); + //SymCryptSha256AppendBlocks_ul2(pChain, pbData, cbData, pcbRemaining); + } +#elif SYMCRYPT_CPU_X86 + SYMCRYPT_EXTENDED_SAVE_DATA SaveData; + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_SHANI_CODE | SYMCRYPT_CPU_FEATURE_SAVEXMM_NOFAIL ) && + SymCryptSaveXmm( &SaveData ) == SYMCRYPT_NO_ERROR ) + { + SymCryptSha256AppendBlocks_shani( pChain, pbData, cbData, pcbRemaining ); + SymCryptRestoreXmm( &SaveData ); + } + else if (SYMCRYPT_CPU_FEATURES_PRESENT(SYMCRYPT_CPU_FEATURE_SSSE3 | SYMCRYPT_CPU_FEATURE_BMI2) + && SymCryptSaveXmm(&SaveData) == SYMCRYPT_NO_ERROR) + { + SymCryptSha256AppendBlocks_xmm_4blocks(pChain, pbData, cbData, pcbRemaining); + SymCryptRestoreXmm(&SaveData); + } + else { + SymCryptSha256AppendBlocks_ul1( pChain, pbData, cbData, pcbRemaining ); + } +#elif SYMCRYPT_CPU_ARM64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_NEON_SHA256 ) ) + { + SymCryptSha256AppendBlocks_instr( pChain, pbData, cbData, pcbRemaining ); + } else { + SymCryptSha256AppendBlocks_ul1( pChain, pbData, cbData, pcbRemaining ); + } +#else + SymCryptSha256AppendBlocks_ul1( pChain, pbData, cbData, pcbRemaining ); +#endif + + //SymCryptSha256AppendBlocks_ul2( pChain, pbData, cbData, pcbRemaining ); + //SymCryptSha256AppendBlocks_xmm1( pChain, pbData, cbData, pcbRemaining ); !!! Needs Save/restore logic + //SymCryptSha256AppendBlocks_xmm2( pChain, pbData, cbData, pcbRemaining ); +} diff --git a/libs/symcrypt/lib/sha256Par-ymm.c b/libs/symcrypt/lib/sha256Par-ymm.c new file mode 100644 index 00000000000..9ae1b2b9dd4 --- /dev/null +++ b/libs/symcrypt/lib/sha256Par-ymm.c @@ -0,0 +1,269 @@ +// +// Sha256Par-ymm.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// All YMM code for SHA256 Parallel operations +// Requires compiler support for avx2 +// + +#include "precomp.h" + +extern SYMCRYPT_ALIGN_AT( 256 ) const UINT32 SymCryptSha256K[64]; + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + +#ifdef __clang__ +#pragma clang attribute push (__attribute__((target("avx2"))), apply_to=function) +#else +#pragma GCC push_options +#pragma GCC target("avx2") +#endif + +// +// Code that uses the YMM registers. +// + +#define MAJYMM( x, y, z ) _mm256_or_si256( _mm256_and_si256( _mm256_or_si256( z, y ), x ), _mm256_and_si256( z, y )) +#define CHYMM( x, y, z ) _mm256_xor_si256( _mm256_and_si256( _mm256_xor_si256( z, y ), x ), z ) + +#define CSIGMA0YMM( x ) \ + _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( \ + _mm256_slli_epi32(x,30) , _mm256_srli_epi32(x, 2) ),\ + _mm256_slli_epi32(x,19) ), _mm256_srli_epi32(x, 13) ),\ + _mm256_slli_epi32(x,10) ), _mm256_srli_epi32(x, 22) ) +#define CSIGMA1YMM( x ) \ + _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( \ + _mm256_slli_epi32(x,26) , _mm256_srli_epi32(x, 6) ),\ + _mm256_slli_epi32(x,21) ), _mm256_srli_epi32(x, 11) ),\ + _mm256_slli_epi32(x,7) ), _mm256_srli_epi32(x, 25) ) +#define LSIGMA0YMM( x ) \ + _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( \ + _mm256_slli_epi32(x,25) , _mm256_srli_epi32(x, 7) ),\ + _mm256_slli_epi32(x,14) ), _mm256_srli_epi32(x, 18) ),\ + _mm256_srli_epi32(x, 3) ) +#define LSIGMA1YMM( x ) \ + _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( \ + _mm256_slli_epi32(x,15) , _mm256_srli_epi32(x, 17) ),\ + _mm256_slli_epi32(x,13) ), _mm256_srli_epi32(x, 19) ),\ + _mm256_srli_epi32(x,10) ) + +// +// Transpose macro, convert S0..S7 into R0..R7; R0 is the lane 0, R3 is lane 7. +// +// +// S0 = S00, S01, S02, S03, S04, S05, S06, S07 +// S1 = S10, S11, S12, S13, S14, S15, S16, S17 +// S2 = S20, S21, S22, S23, S24, S25, S26, S27 +// S3 = S30, S31, S32, S33, S34, S35, S36, S37 +// S4 = S40, S41, S42, S43, S44, S45, S46, S47 +// S5 = S50, S51, S52, S53, S54, S55, S56, S57 +// S6 = S60, S61, S62, S63, S64, S65, S66, S67 +// S7 = S70, S71, S72, S73, S74, S75, S76, S77 +// +// T0 = S00, S10, S01, S11, S04, S14, S05, S15 +// T1 = S02, S12, S03, S13, S06, S16, S07, S17 +// T2 = S20, S30, S21, S31, S24, S34, S25, S35 +// T3 = S22, S32, S23, S33, S26, S36, S27, S37 +// T4 = S40, S50, S41, S51, S44, S54, S45, S55 +// T5 = S42, S52, S43, S53, S46, S56, S47, S57 +// T6 = S60, S70, S61, S71, S64, S74, S65, S75 +// T7 = S62, S72, S63, S73, S66, S76, S67, S77 +// +// U0 = S00, S10, S20, S30, S04, S14, S24, S34 +// U1 = S01, S11, S21, S31, S05, S15, S25, S35 +// U2 = S02, S12, S22, S32, S06, S16, S26, S36 +// U3 = S03, S13, S23, S33, S07, S17, S27, S37 +// U4 = S40, S50, S60, S70, S44, S54, S64, S74 +// U5 = S41, S51, S61, S71, S45, S55, S65, S75 +// U6 = S42, S52, S62, S72, S46, S56, S66, S76 +// U7 = S43, S53, S63, S73, S47, S47, S67, S77 +// +// R0 = s00, s10, s20, s30, s40, s50, s60, s70 +// R1 = s01, s11, s21, s31, s41, s51, s61, s71 +// R2 = s02, s12, s22, s32, s42, s52, s62, s72 +// R3 = s03, s13, s23, s33, s43, s53, s63, s73 +// R4 = s04, s14, s24, s34, s44, s54, s64, s74 +// R5 = s05, s15, s25, s35, s45, s55, s65, s75 +// R6 = s06, s16, s26, s36, s46, s56, s66, s76 +// R7 = s07, s17, s27, s37, s47, s57, s67, s77 +// +#define YMM_TRANSPOSE_32( _R0, _R1, _R2, _R3, _R4, _R5, _R6, _R7, _S0, _S1, _S2, _S3, _S4, _S5, _S6, _S7 ) \ + {\ + __m256i _T0, _T1, _T2, _T3, _T4, _T5, _T6, _T7;\ + __m256i _U0, _U1, _U2, _U3, _U4, _U5, _U6, _U7;\ + _T0 = _mm256_unpacklo_epi32( _S0, _S1 ); _T1 = _mm256_unpackhi_epi32( _S0, _S1 );\ + _T2 = _mm256_unpacklo_epi32( _S2, _S3 ); _T3 = _mm256_unpackhi_epi32( _S2, _S3 );\ + _T4 = _mm256_unpacklo_epi32( _S4, _S5 ); _T5 = _mm256_unpackhi_epi32( _S4, _S5 );\ + _T6 = _mm256_unpacklo_epi32( _S6, _S7 ); _T7 = _mm256_unpackhi_epi32( _S6, _S7 );\ + \ + _U0 = _mm256_unpacklo_epi64( _T0, _T2 ); _U1 = _mm256_unpackhi_epi64( _T0, _T2 );\ + _U2 = _mm256_unpacklo_epi64( _T1, _T3 ); _U3 = _mm256_unpackhi_epi64( _T1, _T3 );\ + _U4 = _mm256_unpacklo_epi64( _T4, _T6 ); _U5 = _mm256_unpackhi_epi64( _T4, _T6 );\ + _U6 = _mm256_unpacklo_epi64( _T5, _T7 ); _U7 = _mm256_unpackhi_epi64( _T5, _T7 );\ + \ + _R0 = _mm256_permute2x128_si256( _U0, _U4, 0x20 ); _R1 = _mm256_permute2x128_si256( _U1, _U5, 0x20);\ + _R2 = _mm256_permute2x128_si256( _U2, _U6, 0x20 ); _R3 = _mm256_permute2x128_si256( _U3, _U7, 0x20);\ + _R4 = _mm256_permute2x128_si256( _U0, _U4, 0x31 ); _R5 = _mm256_permute2x128_si256( _U1, _U5, 0x31);\ + _R6 = _mm256_permute2x128_si256( _U2, _U6, 0x31 ); _R7 = _mm256_permute2x128_si256( _U3, _U7, 0x31);\ + } + +VOID +SYMCRYPT_CALL +SymCryptParallelSha256AppendBlocks_ymm( + _Inout_updates_( 8 ) PSYMCRYPT_SHA256_CHAINING_STATE * pChain, + _Inout_updates_( 8 ) PCBYTE * ppByte, + SIZE_T nBytes, + _Out_writes_( PAR_SCRATCH_ELEMENTS_256 * 32 ) PBYTE pScratch ) +{ + // + // Implementation that uses 8 lanes in the YMM registers + // + __m256i * buf = (__m256i *)pScratch; + __m256i * W = &buf[4 + 8]; + __m256i * ha = &buf[4]; // initial state words, in order h, g, ..., b, a + __m256i A, B, C, D, T; + __m256i T0, T1, T2, T3, T4, T5, T6, T7; + __m256i BYTE_REVERSE_32; + int r; + + _mm256_zeroupper(); + BYTE_REVERSE_32 = _mm256_set_epi8( 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3 ); + + // + // The chaining state can be unaligned on x86, so we use unaligned loads + // + + T0 = _mm256_loadu_si256( (__m256i *)&pChain[0]->H[0] ); + T1 = _mm256_loadu_si256( (__m256i *)&pChain[1]->H[0] ); + T2 = _mm256_loadu_si256( (__m256i *)&pChain[2]->H[0] ); + T3 = _mm256_loadu_si256( (__m256i *)&pChain[3]->H[0] ); + T4 = _mm256_loadu_si256( (__m256i *)&pChain[4]->H[0] ); + T5 = _mm256_loadu_si256( (__m256i *)&pChain[5]->H[0] ); + T6 = _mm256_loadu_si256( (__m256i *)&pChain[6]->H[0] ); + T7 = _mm256_loadu_si256( (__m256i *)&pChain[7]->H[0] ); + + YMM_TRANSPOSE_32( ha[7], ha[6], ha[5], ha[4], ha[3], ha[2], ha[1], ha[0], T0, T1, T2, T3, T4, T5, T6, T7 ); + + buf[0] = ha[4]; + buf[1] = ha[5]; + buf[2] = ha[6]; + buf[3] = ha[7]; + + while( nBytes >= 64 ) + { + + // + // Capture the input into W[0..15] + // + for( r=0; r<16; r += 8 ) + { + T0 = _mm256_shuffle_epi8( _mm256_loadu_si256( (__m256i *) ppByte[0] ), BYTE_REVERSE_32 ); ppByte[0] += 32; + T1 = _mm256_shuffle_epi8( _mm256_loadu_si256( (__m256i *) ppByte[1] ), BYTE_REVERSE_32 ); ppByte[1] += 32; + T2 = _mm256_shuffle_epi8( _mm256_loadu_si256( (__m256i *) ppByte[2] ), BYTE_REVERSE_32 ); ppByte[2] += 32; + T3 = _mm256_shuffle_epi8( _mm256_loadu_si256( (__m256i *) ppByte[3] ), BYTE_REVERSE_32 ); ppByte[3] += 32; + T4 = _mm256_shuffle_epi8( _mm256_loadu_si256( (__m256i *) ppByte[4] ), BYTE_REVERSE_32 ); ppByte[4] += 32; + T5 = _mm256_shuffle_epi8( _mm256_loadu_si256( (__m256i *) ppByte[5] ), BYTE_REVERSE_32 ); ppByte[5] += 32; + T6 = _mm256_shuffle_epi8( _mm256_loadu_si256( (__m256i *) ppByte[6] ), BYTE_REVERSE_32 ); ppByte[6] += 32; + T7 = _mm256_shuffle_epi8( _mm256_loadu_si256( (__m256i *) ppByte[7] ), BYTE_REVERSE_32 ); ppByte[7] += 32; + + YMM_TRANSPOSE_32( W[r], W[r+1], W[r+2], W[r+3], W[r+4], W[r+5], W[r+6], W[r+7], T0, T1, T2, T3, T4, T5, T6, T7 ); + } + + // + // Expand the message + // + A = W[15]; + B = W[14]; + D = W[0]; + for( r=16; r<64; r+= 2 ) + { + // Loop invariant: A=W[r-1], B = W[r-2], D = W[r-16] + + // + // Macro for one word of message expansion. + // Invariant: + // on entry: a = W[r-1], b = W[r-2], d = W[r-16] + // on exit: W[r] computed, a = W[r-1], b = W[r], c = W[r-15] + // + #define EXPAND( a, b, c, d, r ) \ + c = W[r-15]; \ + b = _mm256_add_epi32( _mm256_add_epi32( _mm256_add_epi32( d, LSIGMA1YMM( b ) ), W[r-7] ), LSIGMA0YMM( c ) ); \ + W[r] = b; \ + + EXPAND( A, B, C, D, r ); + EXPAND( B, A, D, C, (r+1)); + + #undef EXPAND + } + + A = ha[7]; + B = ha[6]; + C = ha[5]; + D = ha[4]; + + for( r=0; r<64; r += 4 ) + { + // + // Loop invariant: + // A, B, C, and D are the a,b,c,d values of the current state. + // W[r] is the next expanded message word to be processed. + // W[r-8 .. r-5] contain the current state words h, g, f, e. + // + + // + // Macro to compute one round + // + #define DO_ROUND( a, b, c, d, t, r ) \ + t = W[r]; \ + t = _mm256_add_epi32( t, CSIGMA1YMM( W[r-5] ) ); \ + t = _mm256_add_epi32( t, W[r-8] ); \ + t = _mm256_add_epi32( t, CHYMM( W[r-5], W[r-6], W[r-7] ) ); \ + t = _mm256_add_epi32( t, _mm256_set1_epi32( SymCryptSha256K[r] )); \ + W[r-4] = _mm256_add_epi32( t, d ); \ + d = _mm256_add_epi32( t, CSIGMA0YMM( a ) ); \ + d = _mm256_add_epi32( d, MAJYMM( c, b, a ) ); + + DO_ROUND( A, B, C, D, T, r ); + DO_ROUND( D, A, B, C, T, (r+1) ); + DO_ROUND( C, D, A, B, T, (r+2) ); + DO_ROUND( B, C, D, A, T, (r+3) ); + #undef DO_ROUND + } + + buf[3] = ha[7] = _mm256_add_epi32( buf[3], A ); + buf[2] = ha[6] = _mm256_add_epi32( buf[2], B ); + buf[1] = ha[5] = _mm256_add_epi32( buf[1], C ); + buf[0] = ha[4] = _mm256_add_epi32( buf[0], D ); + ha[3] = _mm256_add_epi32( ha[3], W[r-5] ); + ha[2] = _mm256_add_epi32( ha[2], W[r-6] ); + ha[1] = _mm256_add_epi32( ha[1], W[r-7] ); + ha[0] = _mm256_add_epi32( ha[0], W[r-8] ); + + nBytes -= 64; + } + + // + // Copy the chaining state back into the hash structure + // + YMM_TRANSPOSE_32( T0, T1, T2, T3, T4, T5, T6, T7, ha[7], ha[6], ha[5], ha[4], ha[3], ha[2], ha[1], ha[0] ); + _mm256_storeu_si256( (__m256i *)&pChain[0]->H[0], T0 ); + _mm256_storeu_si256( (__m256i *)&pChain[1]->H[0], T1 ); + _mm256_storeu_si256( (__m256i *)&pChain[2]->H[0], T2 ); + _mm256_storeu_si256( (__m256i *)&pChain[3]->H[0], T3 ); + _mm256_storeu_si256( (__m256i *)&pChain[4]->H[0], T4 ); + _mm256_storeu_si256( (__m256i *)&pChain[5]->H[0], T5 ); + _mm256_storeu_si256( (__m256i *)&pChain[6]->H[0], T6 ); + _mm256_storeu_si256( (__m256i *)&pChain[7]->H[0], T7 ); + + _mm256_zeroupper(); + +} + +#ifdef __clang__ +#pragma clang attribute pop +#else +#pragma GCC pop_options +#endif + +#endif // CPU_X86_X64 diff --git a/libs/symcrypt/lib/sha256Par.c b/libs/symcrypt/lib/sha256Par.c new file mode 100644 index 00000000000..775913b016b --- /dev/null +++ b/libs/symcrypt/lib/sha256Par.c @@ -0,0 +1,1243 @@ +// +// Sha256Par.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +// +// This module contains the routines to implement SHA2-256 from FIPS 180-2 in parallel mode +// + +#include "precomp.h" + +extern SYMCRYPT_ALIGN_AT( 256 ) const UINT32 SymCryptSha256K[64]; + + +// +// Not all CPU architectures support parallel code. +// +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + +#define SUPPORT_PARALLEL 1 +#define MIN_PARALLEL 2 +#define MAX_PARALLEL 8 + +#elif SYMCRYPT_CPU_ARM + +#define SUPPORT_PARALLEL 1 +#define MIN_PARALLEL 3 +#define MAX_PARALLEL 4 + +#else + +#define SUPPORT_PARALLEL 0 + +#endif + + +VOID +SYMCRYPT_CALL +SymCryptParallelSha256AppendBytes_serial( + _Inout_updates_( nPar ) PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE * pWork, + _In_range_(1, MAX_PARALLEL) SIZE_T nPar, + SIZE_T nBytes ); + +// +// Currently these are the generic implementations in terms of the single hash code. +// + +VOID +SYMCRYPT_CALL +SymCryptParallelSha256Init( + _Out_writes_( nStates ) PSYMCRYPT_SHA256_STATE pStates, + SIZE_T nStates ) +{ + SIZE_T i; + + for( i=0; i<nStates; i++ ) + { + SymCryptSha256Init( &pStates[i] ); + } +} + +#if !SUPPORT_PARALLEL +// +// No parallel support on this CPU +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptParallelSha256Process( + _Inout_updates_( nStates ) PSYMCRYPT_SHA256_STATE pStates, + SIZE_T nStates, + _Inout_updates_( nOperations ) PSYMCRYPT_PARALLEL_HASH_OPERATION pOperations, + SIZE_T nOperations, + _Out_writes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + return SymCryptParallelHashProcess_serial( SymCryptParallelSha256Algorithm, pStates, nStates, pOperations, nOperations, pbScratch, cbScratch ); +} +#endif + + +#if SUPPORT_PARALLEL + + +// +// This function looks at a state and decides what to do. +// If it returns FALSE, then this state is done and no further processing is required. +// If it returns TRUE, the pbData/cbData have to be processed in parallel. +// This function is called again on the same state after the pbData/cbData have been processed. +// +// Internally, it keeps track of the next step to be taken for this state. +// the processingState keeps track of the next action to take. +// + + +BOOLEAN +SYMCRYPT_CALL +SymCryptParallelSha256Result1( + _In_ PCSYMCRYPT_PARALLEL_HASH pParHash, + _Inout_ PSYMCRYPT_COMMON_HASH_STATE pState, + _Inout_ PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE pScratch, + _Out_ BOOLEAN *pRes) +{ + UINT32 bytesInBuffer = pState->bytesInBuffer; + + UNREFERENCED_PARAMETER( pParHash ); + // + // Function is called when a Result is requested from a parallel hash state. + // Do the first step of the padding. + // + pState->buffer[bytesInBuffer++] = 0x80; + SymCryptWipe( &pState->buffer[bytesInBuffer], SYMCRYPT_SHA256_INPUT_BLOCK_SIZE - bytesInBuffer ); + + pScratch->pbData = &pState->buffer[0]; + pScratch->cbData = SYMCRYPT_SHA256_INPUT_BLOCK_SIZE; + + if( bytesInBuffer > SYMCRYPT_SHA256_INPUT_BLOCK_SIZE - 8 ) + { + // We need 2 blocks for the padding + pScratch->processingState = STATE_RESULT2; + } else { + SYMCRYPT_STORE_MSBFIRST64( &pState->buffer[SYMCRYPT_SHA256_INPUT_BLOCK_SIZE - 8], pState->dataLengthL * 8 ); + pScratch->processingState = STATE_RESULT_DONE; + } + + *pRes = TRUE; // return value from the SetWork function + return TRUE; // Return from the SetWork function +} + + +BOOLEAN +SYMCRYPT_CALL +SymCryptParallelSha256Result2( + _In_ PCSYMCRYPT_PARALLEL_HASH pParHash, + _Inout_ PSYMCRYPT_COMMON_HASH_STATE pState, + _Inout_ PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE pScratch, + _Out_ BOOLEAN *pRes) +{ + UNREFERENCED_PARAMETER( pParHash ); + // + // Called for the 2nd block of a long padding + // + SymCryptWipe( &pState->buffer[0], SYMCRYPT_SHA256_INPUT_BLOCK_SIZE ); + SYMCRYPT_STORE_MSBFIRST64( &pState->buffer[SYMCRYPT_SHA256_INPUT_BLOCK_SIZE - 8], pState->dataLengthL * 8 ); + pScratch->pbData = &pState->buffer[0]; + pScratch->cbData = SYMCRYPT_SHA256_INPUT_BLOCK_SIZE; + pScratch->processingState = STATE_RESULT_DONE; + *pRes = TRUE; + return TRUE; +} + +VOID +SYMCRYPT_CALL +SymCryptParallelSha256ResultDone( + _In_ PCSYMCRYPT_PARALLEL_HASH pParHash, + _Inout_ PSYMCRYPT_COMMON_HASH_STATE pState, + _In_ PCSYMRYPT_PARALLEL_HASH_OPERATION pOp) +{ + PSYMCRYPT_SHA256_STATE pSha256State = (PSYMCRYPT_SHA256_STATE) pState; + + UNREFERENCED_PARAMETER( pParHash ); + + SYMCRYPT_ASSERT( pOp->hashOperation == SYMCRYPT_HASH_OPERATION_RESULT ); + SYMCRYPT_ASSERT( pOp->cbBuffer == SYMCRYPT_SHA256_RESULT_SIZE ); + + SymCryptUint32ToMsbFirst( &pSha256State->chain.H[0], pOp->pbBuffer, 8 ); + SymCryptWipeKnownSize( pSha256State, sizeof( *pSha256State )); + SymCryptSha256Init( pSha256State ); +} + + +#if 0 + +BOOL +SYMCRYPT_CALL +SymCryptParallelSha256SetNextWork( PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE pScratch ) +{ + PSYMCRYPT_SHA256_STATE pState; + PCSYMRYPT_PARALLEL_HASH_OPERATION pOp; + UINT32 bytesInBuffer; + UINT32 todo; + + // Retrieve the state we will operate on. + pState = (PSYMCRYPT_SHA256_STATE) pScratch->hashState; + + // + // This is a state machine where some states have to iterate + // The loop allows them to use 'continue' for that. + // +#pragma warning( suppress: 4127 ) // conditional expression is constant + while( TRUE ) + { + // + // At this point, the processing state, pbData/cbData, and next pointer define what needs to be done. + // STATE_NEXT: cbData == 0 and we have to process the remaining operations. + // STATE_DATA_START: We are working on the next operation; the first BytesAlreadyProcessed have been hashed, + // and the hash state has an empty buffer. + // STATE_DATA_END: We are working on the next operation (an append), and pbData/cbData have whatever partial block remains + // after all the whole blocks have been processed. + // STATE_PAD2: We are working on the next operation (a result), and have processed the first half of a 2-block padding. + // STATE_RESULT: We are working on the next operation (a result), and have processed all the padding. + // + // The pState->dataLength is updated whenever we copy bytes from the append into the state's buffer, or when + // we return TRUE and process bulk data. + // + pOp = pScratch->next; + switch( pScratch->processingState ) + { + case STATE_NEXT: + + if( pOp == NULL ) + { + return FALSE; + } + + bytesInBuffer = pState->bytesInBuffer; + + // SYMCRYPT_ASSERT( pOp->cbBuffer < ((SIZE_T)-1)/2 ); // used during testing + + if( pOp->hashOperation == SYMCRYPT_HASH_OPERATION_APPEND ) + { + pState->dataLengthL += pOp->cbBuffer; + if( bytesInBuffer > 0 ) + { + todo = (UINT32) SYMCRYPT_MIN( SYMCRYPT_SHA256_INPUT_BLOCK_SIZE - bytesInBuffer, pOp->cbBuffer ); + memcpy( &pState->buffer[bytesInBuffer], pOp->pbBuffer, todo ); + pState->bytesInBuffer += todo; + if( pState->bytesInBuffer == SYMCRYPT_SHA256_INPUT_BLOCK_SIZE ) + { + // + // We filled the buffer; set it for processing. + // Remember the # bytes we did and set the next state to process the rest of the request. + // + pScratch->pbData = &pState->buffer[0]; + pScratch->cbData = sizeof( pState->buffer ); + pState->bytesInBuffer = 0; + if( todo == pOp->cbBuffer ) + { + // + // We finished the request after the pbData processing + // + pScratch->next = pOp->next; + // pScratch->processingState = STATE_NEXT // already has that value + } else { + pScratch->processingState = STATE_DATA_START; + SYMCRYPT_ASSERT( todo <= 0xff ); + pScratch->bytesAlreadyProcessed = (BYTE) todo; + } + // + // We process the buffer here, no need to update the dataLength + // + return TRUE; + } else { + // + // We finished the operation; skip to the next one. + // + pScratch->next = pOp->next; + // pScratch->processingState = STATE_NEXT // already has that value + continue; + } + } else { + // + // Buffer is empty; process the bulk data + // + pScratch->pbData = pOp->pbBuffer; + pScratch->cbData = pOp->cbBuffer; + pScratch->processingState = STATE_DATA_END; + + // + // Return TRUE if there is real data to process, and just re-run the state + // machine if we should copy the partial block to the buffer. + // + if( pScratch->cbData >= SYMCRYPT_SHA256_INPUT_BLOCK_SIZE ) + { + return TRUE; + } else { + continue; + } + } + } else { + SYMCRYPT_ASSERT( pOp->hashOperation == SYMCRYPT_HASH_OPERATION_RESULT ); + + pState->buffer[bytesInBuffer++] = 0x80; + SymCryptWipe( &pState->buffer[bytesInBuffer], SYMCRYPT_SHA256_INPUT_BLOCK_SIZE - bytesInBuffer ); + + pScratch->pbData = &pState->buffer[0]; + pScratch->cbData = sizeof( pState->buffer ); + + if( bytesInBuffer > SYMCRYPT_SHA256_INPUT_BLOCK_SIZE - 8 ) + { + // We need 2 blocks for the padding + pScratch->processingState = STATE_PAD2; + } else { + SYMCRYPT_STORE_MSBFIRST64( &pState->buffer[SYMCRYPT_SHA256_INPUT_BLOCK_SIZE - 8], pState->dataLengthL * 8 ); + pScratch->processingState = STATE_RESULT; + } + return TRUE; + } + break; + + case STATE_DATA_START: + // + // The next operation is an append, and the first few bytes of that operation have already been copied to + // the buffer and processed. We need to process the rest. + // Note that the # bytes remaining is never zero. + // + SYMCRYPT_ASSERT( pOp->hashOperation == SYMCRYPT_HASH_OPERATION_APPEND && pOp->cbBuffer >= pScratch->bytesAlreadyProcessed ); + + pScratch->pbData = pOp->pbBuffer + pScratch->bytesAlreadyProcessed; + pScratch->cbData = pOp->cbBuffer - pScratch->bytesAlreadyProcessed; + if( pScratch->cbData >= SYMCRYPT_SHA256_INPUT_BLOCK_SIZE ) + { + pScratch->processingState = STATE_DATA_END; + return TRUE; + } + + // + // We have less than one block left; this is exactly the same state as we have at the end of + // a normal append. Fall through to that code. + // + // FALLTHROUGH! + + case STATE_DATA_END: + // + // We finished processing the whole blocks of the pScratch->pbData, and have to process the rest. + // The current append is already popped off the work list. + // + if( pScratch->cbData > 0 ) + { + SYMCRYPT_ASSERT( pScratch->cbData < SYMCRYPT_SHA256_INPUT_BLOCK_SIZE ); + memcpy( &pState->buffer[0], pScratch->pbData, pScratch->cbData ); + pState->bytesInBuffer = (UINT32) pScratch->cbData; + } + pScratch->next = pOp->next; + pScratch->processingState = STATE_NEXT; + continue; + + case STATE_PAD2: + SymCryptWipe( &pState->buffer[0], sizeof( pState->buffer )); + SYMCRYPT_STORE_MSBFIRST64( &pState->buffer[SYMCRYPT_SHA256_INPUT_BLOCK_SIZE - 8], pState->dataLengthL * 8 ); + pScratch->pbData = &pState->buffer[0]; + pScratch->cbData = sizeof( pState->buffer ); + pScratch->processingState = STATE_RESULT; + return TRUE; + + case STATE_RESULT: + SYMCRYPT_ASSERT( pOp->hashOperation == SYMCRYPT_HASH_OPERATION_RESULT ); + + SymCryptUint32ToMsbFirst( &pState->chain.H[0], pOp->pbBuffer, 8 ); + SymCryptWipeKnownSize( pState, sizeof( *pState )); + SymCryptSha256Init( pState ); + + pScratch->next = pOp->next; + pScratch->processingState = STATE_NEXT; + continue; + } + } + +#if 0 // old code, retain until we have the new one working. + ============ old code + + + SIZE_T bytesInBuffer; + SIZE_T todo; + + switch( pScratch->processingState ) + { + case START: + + if( pState->pbData != NULL ) + { + bytesInBuffer = pState->internalState.hashState.dataLength & SYMCRYPT_SHA256_INPUT_BLOCK_SIZE - 1; + + // + // There are bytes in the buffer; consume enough input to get rid of them. + // + if( bytesInBuffer > 0 ) + { + todo = SYMCRYPT_MIN( SYMCRYPT_SHA256_INPUT_BLOCK_SIZE - bytesInBuffer, pState->cbData ); + memcpy( &pState->internalState.hashState.buffer[bytesInBuffer], pState->pbData, todo ); + pState->pbData += todo; + pState->cbData -= todo; + pState->internalState.hashState.dataLength += todo; + + // + // We don't parallelize the processing of the first block to get to the whole-block state. + // It would mean we get a 1-size block up front, and that interferes with the sorted scheduling + // we do. This is not a common case, and we document that this is inefficient. + // + if( (pState->internalState.hashState.dataLength & (SYMCRYPT_SHA256_INPUT_BLOCK_SIZE - 1)) == 0 ) + { + SymCryptSha256AppendBlocks( &pState->internalState.hashState.chain, + &pState->internalState.hashState.buffer[0], + SYMCRYPT_SHA256_INPUT_BLOCK_SIZE ); + } + } + + if( pState->cbData >= SYMCRYPT_SHA256_INPUT_BLOCK_SIZE ) + { + // + // We have more bytes to do; this means that the internal buffer is empty. + // Set the data blocks up for processing. We increment the dataLength here + // as that is part of this function, not of the processing code. + // + pState->internalState.processingState = DATA; + pState->internalState.hashState.dataLength += pState->cbData & ~(SYMCRYPT_SHA256_INPUT_BLOCK_SIZE - 1); + return TRUE; + } + + } + + // + // FALL THROUGH TO THE DATA PROCESSING + // + // There are two cases here: + // - the internal buffer is empty and we have between 1 and 63 bytes left to hash. + // - We have no bytes left to hash, but the internal buffer might contain data. + // The first case is exactly what we get after DATA processing. + // The second case is trivially handled by the same code paths as the first one. + // Instead of duplicating the code, + // we fall through to the DATA section. + // + + pState->internalState.processingState = DATA; + + case DATA: + // + // We just finished the data work, or the START code fell through here to handle the + // padding and/or pbResult + // If we just did data processing, the internal buffer is empty. + // If the internal buffer contains data, then cbData == 0. + // + + if( pState->pbData != NULL && pState->cbData > 0 ) + { + SYMCRYPT_ASSERT( pState->cbData < SYMCRYPT_SHA256_INPUT_BLOCK_SIZE ); + memcpy( &pState->internalState.hashState.buffer[0], pState->pbData, pState->cbData ); + pState->internalState.hashState.dataLength += pState->cbData; + } + + // + // This completes the consumption of the pbData. Set it to NULL as per the API spec. + // + + pState->pbData = NULL; + pState->cbData = 0; + + // + // This concludes the data processing. Now let's see if we have to compute the results + // + + if( pState->pbResult == NULL ) + { + return FALSE; + } + + bytesInBuffer = pState->internalState.hashState.dataLength & SYMCRYPT_SHA256_INPUT_BLOCK_SIZE - 1; + + // Add the first byte of padding. (Always fits as the buffer is never left full.) + pState->internalState.hashState.buffer[bytesInBuffer++] = 0x80; + SymCryptWipe( &pState->internalState.hashState.buffer[bytesInBuffer], SYMCRYPT_SHA256_INPUT_BLOCK_SIZE - bytesInBuffer ); + + if( bytesInBuffer > SYMCRYPT_SHA256_INPUT_BLOCK_SIZE - 8 ) + { + // + // We need 2 blocks for the padding. + // + pState->internalState.processingState = PAD_INTERMEDIATE; + pState->pbData = &pState->internalState.hashState.buffer[0]; + pState->cbData = SYMCRYPT_SHA256_INPUT_BLOCK_SIZE; + + return TRUE; + } + + // + // Single padding block + // + SYMCRYPT_STORE_MSBFIRST64( &pState->internalState.hashState.buffer[SYMCRYPT_SHA256_INPUT_BLOCK_SIZE - 8], pState->internalState.hashState.dataLength * 8 ); + pState->internalState.processingState = PAD_FINAL; + pState->pbData = &pState->internalState.hashState.buffer[0]; + pState->cbData = SYMCRYPT_SHA256_INPUT_BLOCK_SIZE; + return TRUE; + + case PAD_INTERMEDIATE: + // + // Done with the intermediate padding, do the final padding. + // We wipe to the end of the buffer, as it is 16-aligned and therefore often faster + // + SymCryptWipe( &pState->internalState.hashState.buffer[0], SYMCRYPT_SHA256_INPUT_BLOCK_SIZE ); + SYMCRYPT_STORE_MSBFIRST64( &pState->internalState.hashState.buffer[SYMCRYPT_SHA256_INPUT_BLOCK_SIZE - 8], pState->internalState.hashState.dataLength * 8 ); + + pState->internalState.processingState = PAD_FINAL; + pState->pbData = &pState->internalState.hashState.buffer[0]; + pState->cbData = SYMCRYPT_SHA256_INPUT_BLOCK_SIZE; + return TRUE; + + case PAD_FINAL: + SymCryptUint32ToMsbFirst( &pState->internalState.hashState.chain.H[0], pState->pbResult, 8 ); + + SymCryptWipeKnownSize( pState, sizeof( *pState ) ); + SymCryptSha256Init( &pState->internalState.hashState ); + SYMCRYPT_SET_MAGIC( &pState->internalState ); + return FALSE; + } +#endif + + SymCryptFatal( 'psha' ); + return FALSE; +} +#endif + +C_ASSERT( (SYMCRYPT_SIMD_ELEMENT_SIZE & (SYMCRYPT_SIMD_ELEMENT_SIZE - 1 )) == 0 ); // check that it is a power of 2 + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptParallelSha256Process( + _Inout_updates_( nStates ) PSYMCRYPT_SHA256_STATE pStates, + SIZE_T nStates, + _Inout_updates_( nOperations ) PSYMCRYPT_PARALLEL_HASH_OPERATION pOperations, + SIZE_T nOperations, + _Out_writes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + UINT32 maxParallel; + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + SYMCRYPT_EXTENDED_SAVE_DATA SaveState; + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_AVX2 ) && SymCryptSaveYmm( &SaveState ) == SYMCRYPT_NO_ERROR ) + { + maxParallel = 8; + scError = SymCryptParallelHashProcess( SymCryptParallelSha256Algorithm, + pStates, + nStates, + pOperations, + nOperations, + pbScratch, + cbScratch, + maxParallel ); + + SymCryptRestoreYmm( &SaveState ); + } else if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_SSSE3 ) && SymCryptSaveXmm( &SaveState ) == SYMCRYPT_NO_ERROR ) + { + maxParallel = 4; + scError = SymCryptParallelHashProcess( SymCryptParallelSha256Algorithm, + pStates, + nStates, + pOperations, + nOperations, + pbScratch, + cbScratch, + maxParallel ); + SymCryptRestoreXmm( &SaveState ); + } else { + scError = SymCryptParallelHashProcess_serial( SymCryptParallelSha256Algorithm, pStates, nStates, pOperations, nOperations, pbScratch, cbScratch ); + } + +#elif SYMCRYPT_CPU_ARM + maxParallel = MAX_PARALLEL; + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_NEON ) ) + { + scError = SymCryptParallelHashProcess( SymCryptParallelSha256Algorithm, + pStates, + nStates, + pOperations, + nOperations, + pbScratch, + cbScratch, + maxParallel ); + } else { + scError = SymCryptParallelHashProcess_serial( SymCryptParallelSha256Algorithm, pStates, nStates, pOperations, nOperations, pbScratch, cbScratch ); + } +#else + scError = SymCryptParallelHashProcess_serial( SymCryptParallelSha256Algorithm, pStates, nStates, pOperations, nOperations, pbScratch, cbScratch ); +#endif + return scError; +} + + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 +// +// Code that uses the XMM registers. +// + +#ifdef __clang__ +#pragma clang attribute push (__attribute__((target("ssse3"))), apply_to=function) +#else +#pragma GCC push_options +#pragma GCC target("ssse3") +#endif + +#define MAJXMM( x, y, z ) _mm_or_si128( _mm_and_si128( _mm_or_si128( z, y ), x ), _mm_and_si128( z, y )) +#define CHXMM( x, y, z ) _mm_xor_si128( _mm_and_si128( _mm_xor_si128( z, y ), x ), z ) + +#define CSIGMA0XMM( x ) \ + _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( \ + _mm_slli_epi32(x,30) , _mm_srli_epi32(x, 2) ),\ + _mm_slli_epi32(x,19) ), _mm_srli_epi32(x, 13) ),\ + _mm_slli_epi32(x,10) ), _mm_srli_epi32(x, 22) ) +#define CSIGMA1XMM( x ) \ + _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( \ + _mm_slli_epi32(x,26) , _mm_srli_epi32(x, 6) ),\ + _mm_slli_epi32(x,21) ), _mm_srli_epi32(x, 11) ),\ + _mm_slli_epi32(x,7) ), _mm_srli_epi32(x, 25) ) +#define LSIGMA0XMM( x ) \ + _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( \ + _mm_slli_epi32(x,25) , _mm_srli_epi32(x, 7) ),\ + _mm_slli_epi32(x,14) ), _mm_srli_epi32(x, 18) ),\ + _mm_srli_epi32(x, 3) ) +#define LSIGMA1XMM( x ) \ + _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( \ + _mm_slli_epi32(x,15) , _mm_srli_epi32(x, 17) ),\ + _mm_slli_epi32(x,13) ), _mm_srli_epi32(x, 19) ),\ + _mm_srli_epi32(x,10) ) + +// +// Transpose macro, convert S0..S3 into R0..R3; R0 is the lane 0, R3 is lane 3. +// S0 = S00, S01, S02, S03; S1 = S10, S11, S12, S13; S2 = S20, S21, S22, S23; S3 = S30, S31, S32, S33 +// T0 = S00, S10, S01, S11; T1 = S02, S12, S03, S13; T2 = S20, S30, S21, S31; T3 = S22, S32, S23, S33 +// R0 = S00, S10, S20, S30; R1 = S01, S11, S21, S31; R2 = S02, S12, S22, S32; R3 = S03, S13, S23, S33 +// +#define XMM_TRANSPOSE_32( _R0, _R1, _R2, _R3, _S0, _S1, _S2, _S3 ) \ + {\ + __m128i _T0, _T1, _T2, _T3;\ + _T0 = _mm_unpacklo_epi32( _S0, _S1 ); _T1 = _mm_unpackhi_epi32( _S0, _S1 );\ + _T2 = _mm_unpacklo_epi32( _S2, _S3 ); _T3 = _mm_unpackhi_epi32( _S2, _S3 );\ + _R0 = _mm_unpacklo_epi64( _T0, _T2 ); _R1 = _mm_unpackhi_epi64( _T0, _T2 );\ + _R2 = _mm_unpacklo_epi64( _T1, _T3 ); _R3 = _mm_unpackhi_epi64( _T1, _T3 );\ + } + +VOID +SYMCRYPT_CALL +SymCryptParallelSha256AppendBlocks_xmm( + _Inout_updates_( 4 ) PSYMCRYPT_SHA256_CHAINING_STATE * pChain, + _Inout_updates_( 4 ) PCBYTE * ppByte, + SIZE_T nBytes, + _Out_writes_( PAR_SCRATCH_ELEMENTS_256 ) __m128i * pScratch ) +{ + // + // Implementation that uses 4 lanes in the XMM registers + // + __m128i * buf = pScratch; // chaining state concatenated with the expanded input block + __m128i * W = &buf[4 + 8]; // W are the 64 words of the expanded input + __m128i * ha = &buf[4]; // initial state words, in order h, g, ..., b, a + __m128i A, B, C, D, T; + __m128i T0, T1, T2, T3; + const __m128i BYTE_REVERSE_32 = _mm_set_epi8( 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3 ); + int r; + + // + // The chaining state can be unaligned on x86, so we use unaligned loads + // + + T0 = _mm_loadu_si128( (__m128i *)&pChain[0]->H[0] ); + T1 = _mm_loadu_si128( (__m128i *)&pChain[1]->H[0] ); + T2 = _mm_loadu_si128( (__m128i *)&pChain[2]->H[0] ); + T3 = _mm_loadu_si128( (__m128i *)&pChain[3]->H[0] ); + + XMM_TRANSPOSE_32( ha[7], ha[6], ha[5], ha[4], T0, T1, T2, T3 ); + + T0 = _mm_loadu_si128( (__m128i *)&pChain[0]->H[4] ); + T1 = _mm_loadu_si128( (__m128i *)&pChain[1]->H[4] ); + T2 = _mm_loadu_si128( (__m128i *)&pChain[2]->H[4] ); + T3 = _mm_loadu_si128( (__m128i *)&pChain[3]->H[4] ); + + XMM_TRANSPOSE_32( ha[3], ha[2], ha[1], ha[0], T0, T1, T2, T3 ); + + buf[0] = ha[4]; + buf[1] = ha[5]; + buf[2] = ha[6]; + buf[3] = ha[7]; + + while( nBytes >= 64 ) + { + + // + // Capture the input into W[0..15] + // + for( r=0; r<16; r += 4 ) + { + T0 = _mm_shuffle_epi8( _mm_loadu_si128( (__m128i *) ppByte[0] ), BYTE_REVERSE_32 ); ppByte[0] += 16; + T1 = _mm_shuffle_epi8( _mm_loadu_si128( (__m128i *) ppByte[1] ), BYTE_REVERSE_32 ); ppByte[1] += 16; + T2 = _mm_shuffle_epi8( _mm_loadu_si128( (__m128i *) ppByte[2] ), BYTE_REVERSE_32 ); ppByte[2] += 16; + T3 = _mm_shuffle_epi8( _mm_loadu_si128( (__m128i *) ppByte[3] ), BYTE_REVERSE_32 ); ppByte[3] += 16; + + XMM_TRANSPOSE_32( W[r], W[r+1], W[r+2], W[r+3], T0, T1, T2, T3 ); + } + + // + // Expand the message + // + A = W[15]; + B = W[14]; + D = W[0]; + for( r=16; r<64; r+= 2 ) + { + // Loop invariant: A=W[r-1], B = W[r-2], D = W[r-16] + + // + // Macro for one word of message expansion. + // Invariant: + // on entry: a = W[r-1], b = W[r-2], d = W[r-16] + // on exit: W[r] computed, a = W[r-1], b = W[r], c = W[r-15] + // + #define EXPAND( a, b, c, d, r ) \ + c = W[r-15]; \ + b = _mm_add_epi32( _mm_add_epi32( _mm_add_epi32( d, LSIGMA1XMM( b ) ), W[r-7] ), LSIGMA0XMM( c ) ); \ + W[r] = b; \ + + EXPAND( A, B, C, D, r ); + EXPAND( B, A, D, C, (r+1)); + + #undef EXPAND + } + + A = ha[7]; + B = ha[6]; + C = ha[5]; + D = ha[4]; + + for( r=0; r<64; r += 4 ) + { + // + // Loop invariant: + // A, B, C, and D are the a,b,c,d values of the current state. + // W[r] is the next expanded message word to be processed. + // W[r-8 .. r-5] contain the current state words h, g, f, e. + // + + // + // Macro to compute one round + // + #define DO_ROUND( a, b, c, d, t, r ) \ + t = W[r]; \ + t = _mm_add_epi32( t, CSIGMA1XMM( W[r-5] ) ); \ + t = _mm_add_epi32( t, W[r-8] ); \ + t = _mm_add_epi32( t, CHXMM( W[r-5], W[r-6], W[r-7] ) ); \ + t = _mm_add_epi32( t, _mm_set1_epi32( SymCryptSha256K[r] )); \ + W[r-4] = _mm_add_epi32( t, d ); \ + d = _mm_add_epi32( t, CSIGMA0XMM( a ) ); \ + d = _mm_add_epi32( d, MAJXMM( c, b, a ) ); + + DO_ROUND( A, B, C, D, T, r ); + DO_ROUND( D, A, B, C, T, (r+1) ); + DO_ROUND( C, D, A, B, T, (r+2) ); + DO_ROUND( B, C, D, A, T, (r+3) ); + #undef DO_ROUND + } + + buf[3] = ha[7] = _mm_add_epi32( buf[3], A ); + buf[2] = ha[6] = _mm_add_epi32( buf[2], B ); + buf[1] = ha[5] = _mm_add_epi32( buf[1], C ); + buf[0] = ha[4] = _mm_add_epi32( buf[0], D ); + ha[3] = _mm_add_epi32( ha[3], W[r-5] ); + ha[2] = _mm_add_epi32( ha[2], W[r-6] ); + ha[1] = _mm_add_epi32( ha[1], W[r-7] ); + ha[0] = _mm_add_epi32( ha[0], W[r-8] ); + + nBytes -= 64; + } + + // + // Copy the chaining state back into the hash structure + // + XMM_TRANSPOSE_32( T0, T1, T2, T3, ha[7], ha[6], ha[5], ha[4] ); + _mm_storeu_si128( (__m128i *)&pChain[0]->H[0], T0 ); + _mm_storeu_si128( (__m128i *)&pChain[1]->H[0], T1 ); + _mm_storeu_si128( (__m128i *)&pChain[2]->H[0], T2 ); + _mm_storeu_si128( (__m128i *)&pChain[3]->H[0], T3 ); + + XMM_TRANSPOSE_32( T0, T1, T2, T3, ha[3], ha[2], ha[1], ha[0] ); + _mm_storeu_si128( (__m128i *)&pChain[0]->H[4], T0 ); + _mm_storeu_si128( (__m128i *)&pChain[1]->H[4], T1 ); + _mm_storeu_si128( (__m128i *)&pChain[2]->H[4], T2 ); + _mm_storeu_si128( (__m128i *)&pChain[3]->H[4], T3 ); + +} + +#ifdef __clang__ +#pragma clang attribute pop +#else +#pragma GCC pop_options +#endif + +#endif // CPU_X86_X64 + +#if SYMCRYPT_CPU_ARM +// +// Code that uses the Neon registers. +// + +#define MAJ( x, y, z ) vorrq_u32( vandq_u32( vorrq_u32( z, y ), x ), vandq_u32( z, y )) +#define CH( x, y, z ) veorq_u32( vandq_u32( veorq_u32( z, y ), x ), z ) + +#define CSIGMA0( x ) \ + veorq_u32( veorq_u32( veorq_u32( veorq_u32( veorq_u32( \ + vshlq_n_u32(x,30) , vshrq_n_u32(x, 2) ),\ + vshlq_n_u32(x,19) ), vshrq_n_u32(x, 13) ),\ + vshlq_n_u32(x,10) ), vshrq_n_u32(x, 22) ) +#define CSIGMA1( x ) \ + veorq_u32( veorq_u32( veorq_u32( veorq_u32( veorq_u32( \ + vshlq_n_u32(x,26) , vshrq_n_u32(x, 6) ),\ + vshlq_n_u32(x,21) ), vshrq_n_u32(x, 11) ),\ + vshlq_n_u32(x,7) ), vshrq_n_u32(x, 25) ) +#define LSIGMA0( x ) \ + veorq_u32( veorq_u32( veorq_u32( veorq_u32( \ + vshlq_n_u32(x,25) , vshrq_n_u32(x, 7) ),\ + vshlq_n_u32(x,14) ), vshrq_n_u32(x, 18) ),\ + vshrq_n_u32(x, 3) ) +#define LSIGMA1( x ) \ + veorq_u32( veorq_u32( veorq_u32( veorq_u32( \ + vshlq_n_u32(x,15) , vshrq_n_u32(x, 17) ),\ + vshlq_n_u32(x,13) ), vshrq_n_u32(x, 19) ),\ + vshrq_n_u32(x,10) ) + +VOID +SYMCRYPT_CALL +SymCryptParallelSha256AppendBlocks_neon( + _Inout_updates_( 4 ) PSYMCRYPT_SHA256_CHAINING_STATE * pChain, + _Inout_updates_( 4 ) PCBYTE * ppByte, + SIZE_T nBytes, + _Out_writes_( PAR_SCRATCH_ELEMENTS_256 ) __n128 * pScratch ) +{ + // + // Implementation that uses 4 lanes in the Neon registers + // + __n128 * buf = pScratch; + __n128 * W = &buf[4 + 8]; + __n128 * ha = &buf[4]; // initial state words, in order h, g, ..., b, a + __n128 A, B, C, D, T; + __n128 T0; + int r; + + // + // This can probably be done faster, but we are missing the VTRN.64 instruction + // which makes it hard to do this efficient in intrinsics. + // + ha[7] = vsetq_lane_u32( pChain[0]->H[0], ha[7], 0 ); + ha[7] = vsetq_lane_u32( pChain[1]->H[0], ha[7], 1 ); + ha[7] = vsetq_lane_u32( pChain[2]->H[0], ha[7], 2 ); + ha[7] = vsetq_lane_u32( pChain[3]->H[0], ha[7], 3 ); + + ha[6] = vsetq_lane_u32( pChain[0]->H[1], ha[6], 0 ); + ha[6] = vsetq_lane_u32( pChain[1]->H[1], ha[6], 1 ); + ha[6] = vsetq_lane_u32( pChain[2]->H[1], ha[6], 2 ); + ha[6] = vsetq_lane_u32( pChain[3]->H[1], ha[6], 3 ); + + ha[5] = vsetq_lane_u32( pChain[0]->H[2], ha[5], 0 ); + ha[5] = vsetq_lane_u32( pChain[1]->H[2], ha[5], 1 ); + ha[5] = vsetq_lane_u32( pChain[2]->H[2], ha[5], 2 ); + ha[5] = vsetq_lane_u32( pChain[3]->H[2], ha[5], 3 ); + + ha[4] = vsetq_lane_u32( pChain[0]->H[3], ha[4], 0 ); + ha[4] = vsetq_lane_u32( pChain[1]->H[3], ha[4], 1 ); + ha[4] = vsetq_lane_u32( pChain[2]->H[3], ha[4], 2 ); + ha[4] = vsetq_lane_u32( pChain[3]->H[3], ha[4], 3 ); + + ha[3] = vsetq_lane_u32( pChain[0]->H[4], ha[3], 0 ); + ha[3] = vsetq_lane_u32( pChain[1]->H[4], ha[3], 1 ); + ha[3] = vsetq_lane_u32( pChain[2]->H[4], ha[3], 2 ); + ha[3] = vsetq_lane_u32( pChain[3]->H[4], ha[3], 3 ); + + ha[2] = vsetq_lane_u32( pChain[0]->H[5], ha[2], 0 ); + ha[2] = vsetq_lane_u32( pChain[1]->H[5], ha[2], 1 ); + ha[2] = vsetq_lane_u32( pChain[2]->H[5], ha[2], 2 ); + ha[2] = vsetq_lane_u32( pChain[3]->H[5], ha[2], 3 ); + + ha[1] = vsetq_lane_u32( pChain[0]->H[6], ha[1], 0 ); + ha[1] = vsetq_lane_u32( pChain[1]->H[6], ha[1], 1 ); + ha[1] = vsetq_lane_u32( pChain[2]->H[6], ha[1], 2 ); + ha[1] = vsetq_lane_u32( pChain[3]->H[6], ha[1], 3 ); + + ha[0] = vsetq_lane_u32( pChain[0]->H[7], ha[0], 0 ); + ha[0] = vsetq_lane_u32( pChain[1]->H[7], ha[0], 1 ); + ha[0] = vsetq_lane_u32( pChain[2]->H[7], ha[0], 2 ); + ha[0] = vsetq_lane_u32( pChain[3]->H[7], ha[0], 3 ); + + buf[0] = ha[4]; + buf[1] = ha[5]; + buf[2] = ha[6]; + buf[3] = ha[7]; + + while( nBytes >= 64 ) + { + + // + // Capture the input into W[0..15] + // + for( r=0; r<16; r ++ ) + { + T0 = vsetq_lane_u32( SYMCRYPT_LOAD_MSBFIRST32( ppByte[0] ), T0, 0 ); ppByte[0] += 4; + T0 = vsetq_lane_u32( SYMCRYPT_LOAD_MSBFIRST32( ppByte[1] ), T0, 1 ); ppByte[1] += 4; + T0 = vsetq_lane_u32( SYMCRYPT_LOAD_MSBFIRST32( ppByte[2] ), T0, 2 ); ppByte[2] += 4; + T0 = vsetq_lane_u32( SYMCRYPT_LOAD_MSBFIRST32( ppByte[3] ), T0, 3 ); ppByte[3] += 4; + W[r] = T0; + } + + // + // Expand the message + // + A = W[15]; + B = W[14]; + D = W[0]; + for( r=16; r<64; r+= 2 ) + { + // Loop invariant: A=W[r-1], B = W[r-2], D = W[r-16] + + // + // Macro for one word of message expansion. + // Invariant: + // on entry: a = W[r-1], b = W[r-2], d = W[r-16] + // on exit: W[r] computed, a = W[r-1], b = W[r], c = W[r-15] + // + #define EXPAND( a, b, c, d, r ) \ + c = W[r-15]; \ + b = vaddq_u32( vaddq_u32( vaddq_u32( d, LSIGMA1( b ) ), W[r-7] ), LSIGMA0( c ) ); \ + W[r] = b; \ + + EXPAND( A, B, C, D, r ); + EXPAND( B, A, D, C, (r+1)); + + #undef EXPAND + } + + A = ha[7]; + B = ha[6]; + C = ha[5]; + D = ha[4]; + + for( r=0; r<64; r += 4 ) + { + // + // Loop invariant: + // A, B, C, and D are the a,b,c,d values of the current state. + // W[r] is the next expanded message word to be processed. + // W[r-8 .. r-5] contain the current state words h, g, f, e. + // + + // + // Macro to compute one round + // + #define DO_ROUND( a, b, c, d, t, r ) \ + t = W[r]; \ + t = vaddq_u32( t, CSIGMA1( W[r-5] ) ); \ + t = vaddq_u32( t, W[r-8] ); \ + t = vaddq_u32( t, CH( W[r-5], W[r-6], W[r-7] ) ); \ + t = vaddq_u32( t, vdupq_n_u32( SymCryptSha256K[r] )); \ + W[r-4] = vaddq_u32( t, d ); \ + d = vaddq_u32( t, CSIGMA0( a ) ); \ + d = vaddq_u32( d, MAJ( c, b, a ) ); + + DO_ROUND( A, B, C, D, T, r ); + DO_ROUND( D, A, B, C, T, (r+1) ); + DO_ROUND( C, D, A, B, T, (r+2) ); + DO_ROUND( B, C, D, A, T, (r+3) ); + #undef DO_ROUND + } + + buf[3] = ha[7] = vaddq_u32( buf[3], A ); + buf[2] = ha[6] = vaddq_u32( buf[2], B ); + buf[1] = ha[5] = vaddq_u32( buf[1], C ); + buf[0] = ha[4] = vaddq_u32( buf[0], D ); + ha[3] = vaddq_u32( ha[3], W[r-5] ); + ha[2] = vaddq_u32( ha[2], W[r-6] ); + ha[1] = vaddq_u32( ha[1], W[r-7] ); + ha[0] = vaddq_u32( ha[0], W[r-8] ); + + nBytes -= 64; + } + + // + // Copy the chaining state back into the hash structure + // + pChain[0]->H[0] = vgetq_lane_u32( ha[7], 0 ); + pChain[1]->H[0] = vgetq_lane_u32( ha[7], 1 ); + pChain[2]->H[0] = vgetq_lane_u32( ha[7], 2 ); + pChain[3]->H[0] = vgetq_lane_u32( ha[7], 3 ); + + pChain[0]->H[1] = vgetq_lane_u32( ha[6], 0 ); + pChain[1]->H[1] = vgetq_lane_u32( ha[6], 1 ); + pChain[2]->H[1] = vgetq_lane_u32( ha[6], 2 ); + pChain[3]->H[1] = vgetq_lane_u32( ha[6], 3 ); + + pChain[0]->H[2] = vgetq_lane_u32( ha[5], 0 ); + pChain[1]->H[2] = vgetq_lane_u32( ha[5], 1 ); + pChain[2]->H[2] = vgetq_lane_u32( ha[5], 2 ); + pChain[3]->H[2] = vgetq_lane_u32( ha[5], 3 ); + + pChain[0]->H[3] = vgetq_lane_u32( ha[4], 0 ); + pChain[1]->H[3] = vgetq_lane_u32( ha[4], 1 ); + pChain[2]->H[3] = vgetq_lane_u32( ha[4], 2 ); + pChain[3]->H[3] = vgetq_lane_u32( ha[4], 3 ); + + pChain[0]->H[4] = vgetq_lane_u32( ha[3], 0 ); + pChain[1]->H[4] = vgetq_lane_u32( ha[3], 1 ); + pChain[2]->H[4] = vgetq_lane_u32( ha[3], 2 ); + pChain[3]->H[4] = vgetq_lane_u32( ha[3], 3 ); + + pChain[0]->H[5] = vgetq_lane_u32( ha[2], 0 ); + pChain[1]->H[5] = vgetq_lane_u32( ha[2], 1 ); + pChain[2]->H[5] = vgetq_lane_u32( ha[2], 2 ); + pChain[3]->H[5] = vgetq_lane_u32( ha[2], 3 ); + + pChain[0]->H[6] = vgetq_lane_u32( ha[1], 0 ); + pChain[1]->H[6] = vgetq_lane_u32( ha[1], 1 ); + pChain[2]->H[6] = vgetq_lane_u32( ha[1], 2 ); + pChain[3]->H[6] = vgetq_lane_u32( ha[1], 3 ); + + pChain[0]->H[7] = vgetq_lane_u32( ha[0], 0 ); + pChain[1]->H[7] = vgetq_lane_u32( ha[0], 1 ); + pChain[2]->H[7] = vgetq_lane_u32( ha[0], 2 ); + pChain[3]->H[7] = vgetq_lane_u32( ha[0], 3 ); + + SymCryptWipeKnownSize( buf, sizeof( buf ) ); +} + +#undef CH +#undef MAJ +#undef CSIGMA0 +#undef CSIGMA1 +#undef LSIGMA0 +#undef LSIGMA1 + +#endif // CPU_X86_X64 + + + +#if SYMCRYPT_CPU_X86 || SYMCRYPT_CPU_AMD64 || SYMCRYPT_CPU_ARM + +VOID +SYMCRYPT_CALL +SymCryptParallelSha256AppendBytes_serial( + _Inout_updates_( nPar ) PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE * pWork, + _In_range_(1, MAX_PARALLEL) SIZE_T nPar, + SIZE_T nBytes ) +{ + SIZE_T i; + SIZE_T tmp; + + SYMCRYPT_ASSERT( nBytes % SYMCRYPT_SHA256_INPUT_BLOCK_SIZE == 0 ); + SYMCRYPT_ASSERT( nPar >= 1 && nPar <= MAX_PARALLEL ); + + for( i=0; i < nPar; i++ ) + { + SYMCRYPT_ASSERT( pWork[i]->cbData >= nBytes ); + SymCryptSha256AppendBlocks( & ((PSYMCRYPT_SHA256_STATE)(pWork[i]->hashState))->chain, pWork[i]->pbData, nBytes, &tmp ); + pWork[i]->pbData += nBytes; + pWork[i]->cbData -= nBytes; + } + return; +} + +VOID +SYMCRYPT_CALL +SymCryptParallelSha256Append( + _Inout_updates_( nPar ) PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE * pWork, + _In_range_(1, MAX_PARALLEL) SIZE_T nPar, + SIZE_T nBytes, + _Out_writes_to_( SYMCRYPT_SIMD_ELEMENT_SIZE * PAR_SCRATCH_ELEMENTS_256, 0 ) + PBYTE pbSimdScratch, + SIZE_T cbSimdScratch ) +{ + PSYMCRYPT_SHA256_CHAINING_STATE apChain[MAX_PARALLEL]; + PCBYTE apData[MAX_PARALLEL]; + SIZE_T i; + UINT32 maxParallel; + + UNREFERENCED_PARAMETER( cbSimdScratch ); // not referenced on FRE builds + SYMCRYPT_ASSERT( cbSimdScratch >= PAR_SCRATCH_ELEMENTS_256 * SYMCRYPT_SIMD_ELEMENT_SIZE ); + SYMCRYPT_ASSERT( ((SIZE_T)pbSimdScratch & (SYMCRYPT_SIMD_ELEMENT_SIZE - 1)) == 0 ); + + // + // Compute maxParallel; this is 4 if nPar <= 4, and 8 if nPar = 5, ..., 8. + // This is how many parameter sets we have to set up. + // +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + + maxParallel = (nPar + 3) & ~3; + SYMCRYPT_ASSERT( maxParallel == 4 || (maxParallel == 8 && SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_AVX2 )) ); + +#elif SYMCRYPT_CPU_ARM + + maxParallel = 4; + +#endif + + SYMCRYPT_ASSERT( nPar >= 1 && nPar <= maxParallel ); + + if( nPar < MIN_PARALLEL ) + { + SymCryptParallelSha256AppendBytes_serial( pWork, nPar, nBytes ); + + // Done with this function. + goto cleanup; + } + + // + // Our parallel code expects exactly four or eight parallel computations. + // We simply duplicate the first one if we get fewer parallel ones. + // That means we write the result multiple times, but it saves a lot of + // extra if()s in the main codeline. + // + + i = 0; + while( i < nPar ) + { + SYMCRYPT_ASSERT( pWork[i]->cbData >= nBytes ); + apChain[i] = & ((PSYMCRYPT_SHA256_STATE)(pWork[i]->hashState))->chain; + apData[i] = pWork[i]->pbData; + pWork[i]->pbData += nBytes; + pWork[i]->cbData -= nBytes; + i++; + } + + while( i < maxParallel ) + { + apChain[i] = apChain[0]; + apData[i] = apData[0]; + i++; + } + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + if( maxParallel == 8 ) + { + SymCryptParallelSha256AppendBlocks_ymm( &apChain[0], &apData[0], nBytes, (PBYTE)((__m256i *)pbSimdScratch) ); + } else { + SymCryptParallelSha256AppendBlocks_xmm( &apChain[0], &apData[0], nBytes, (__m128i *)pbSimdScratch ); + } +#elif SYMCRYPT_CPU_ARM + SymCryptParallelSha256AppendBlocks_neon( &apChain[0], &apData[0], nBytes, (__n128 *) pbSimdScratch ); +#else +#error Unknown CPU +#endif + +cleanup: + ;// no cleanup at this moment. +} + +#endif + +/* +VOID +SYMCRYPT_CALL +SymCryptParallelSha256AppendBlocks( + _Inout_updates_( nWork ) PSYMCRYPT_PARALLEL_SHA256_STATE * pWork, + SIZE_T nWork, + SIZE_T nBytes ) +{ + SIZE_T i; + + SYMCRYPT_ASSERT( nWork >= 1 && nWork <= 4 ); + + for( i=0; i < nWork; i++ ) + { + SYMCRYPT_ASSERT( pWork[i]->cbData >= nBytes ); + SymCryptSha256AppendBlocks( &pWork[i]->internalState.hashState.chain, pWork[i]->pbData, nBytes ); + pWork[i]->pbData += nBytes; + pWork[i]->cbData -= nBytes; + } +} + +*/ + +#endif // SUPPORT_PARALLEL + +#if SUPPORT_PARALLEL + +const SYMCRYPT_PARALLEL_HASH SymCryptParallelSha256Algorithm_default = { + &SymCryptSha256Algorithm_default, + PAR_SCRATCH_ELEMENTS_256 * SYMCRYPT_SIMD_ELEMENT_SIZE, + &SymCryptParallelSha256Result1, + &SymCryptParallelSha256Result2, + &SymCryptParallelSha256ResultDone, + &SymCryptParallelSha256Append, +}; + +#else + +// +// For platforms that do not have a parallel hash implementation +// we use this structure to provide the necessary data to the _serial +// implementation of the function. +// +const SYMCRYPT_PARALLEL_HASH SymCryptParallelSha256Algorithm_default = { + &SymCryptSha256Algorithm_default, + PAR_SCRATCH_ELEMENTS_256 * SYMCRYPT_SIMD_ELEMENT_SIZE, + NULL, + NULL, + NULL, + NULL, +}; + +#endif + +const PCSYMCRYPT_PARALLEL_HASH SymCryptParallelSha256Algorithm = &SymCryptParallelSha256Algorithm_default; + + +#define N_SELFTEST_STATES 5 // Just enough to trigger YMM usage + +VOID +SYMCRYPT_CALL +SymCryptParallelSha256Selftest(void) +{ + SYMCRYPT_ERROR scError; + SYMCRYPT_SHA256_STATE states[N_SELFTEST_STATES]; + BYTE result[N_SELFTEST_STATES][SYMCRYPT_SHA256_RESULT_SIZE]; + SYMCRYPT_PARALLEL_HASH_OPERATION op[2*N_SELFTEST_STATES]; + BYTE scratch[SYMCRYPT_PARALLEL_SHA256_FIXED_SCRATCH + N_SELFTEST_STATES * SYMCRYPT_PARALLEL_HASH_PER_STATE_SCRATCH]; + int i; + + SymCryptParallelSha256Init( &states[0], N_SELFTEST_STATES ); + + for( i=0; i<N_SELFTEST_STATES; i++ ) + { + op[2*i ].iHash = i; + op[2*i ].hashOperation = SYMCRYPT_HASH_OPERATION_APPEND; + op[2*i ].pbBuffer = (PBYTE) SymCryptTestMsg3; + op[2*i ].cbBuffer = sizeof(SymCryptTestMsg3); + op[2*i + 1].iHash = i; + op[2*i + 1].hashOperation = SYMCRYPT_HASH_OPERATION_RESULT; + op[2*i + 1].pbBuffer = &result[i][0]; + op[2*i + 1].cbBuffer = SYMCRYPT_SHA256_RESULT_SIZE; + } + + scError = SymCryptParallelSha256Process( &states[0], N_SELFTEST_STATES, op, 2*N_SELFTEST_STATES, scratch, sizeof( scratch ) ); + if( scError != SYMCRYPT_NO_ERROR ) + { + SymCryptFatal( 'PS25' ); + } + + for( i=0; i<N_SELFTEST_STATES; i++ ) + { + SymCryptInjectError( &result[i][0], SYMCRYPT_SHA256_RESULT_SIZE ); + + if( memcmp( &result[i][0], SymCryptSha256KATAnswer, SYMCRYPT_SHA256_RESULT_SIZE ) != 0 ) { + SymCryptFatal( 'PS25' ); + } + } +} diff --git a/libs/symcrypt/lib/sha3.c b/libs/symcrypt/lib/sha3.c new file mode 100644 index 00000000000..fbbd0fe28dd --- /dev/null +++ b/libs/symcrypt/lib/sha3.c @@ -0,0 +1,619 @@ +// +// Sha3.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +// +// See the symcrypt.h file for documentation on what the various functions do. +// + + +// +// Keccak state +// +// Keccak-f[1600] state consists of 25 64-bit words. We represent this state as a single +// dimensional array of 25 elements (Wi being the i^th element of the array for i=0..24) +// with the following mapping to two dimensional coordinates. Note that in FIPS 202 Figure 2, +// the element W0 at (x,y)=(0,0) is depicted in the middle of the 5x5 array. We set W0 +// to be the first element so that the rate part of the permutation maps to the beginning +// of the state. +// +// x=0 x=1 x=2 x=3 x=4 +// ----------------------- +// y=0 W0 W1 W2 W3 W4 +// y=1 W5 W6 W7 W8 W9 +// y=2 W10 W11 W12 W13 W14 +// y=3 W15 W16 W17 W18 W19 +// y=4 W20 W21 W22 W23 W24 + + + +// Rotation constants for Keccak Rho transformation +static const UINT8 KeccakRhoK[25] = { + 0, 1, 62, 28, 27, // y = 0 + 36, 44, 6, 55, 20, // y = 1 + 3, 10, 43, 25, 39, // y = 2 + 41, 45, 15, 21, 8, // y = 3 + 18, 2, 61, 56, 14, // y = 4 +}; + +// Keccak round constants +static UINT64 KeccakIotaK[24] = { + 0x0000000000000001ULL, 0x0000000000008082ULL, 0x800000000000808aULL, 0x8000000080008000ULL, + 0x000000000000808bULL, 0x0000000080000001ULL, 0x8000000080008081ULL, 0x8000000000008009ULL, + 0x000000000000008aULL, 0x0000000000000088ULL, 0x0000000080008009ULL, 0x000000008000000aULL, + 0x000000008000808bULL, 0x800000000000008bULL, 0x8000000000008089ULL, 0x8000000000008003ULL, + 0x8000000000008002ULL, 0x8000000000000080ULL, 0x000000000000800aULL, 0x800000008000000aULL, + 0x8000000080008081ULL, 0x8000000000008080ULL, 0x0000000080000001ULL, 0x8000000080008008ULL +}; + +// XOR sum of column c of the state +#define KECCAK_COLUMN_SUM(state, c) \ + (state[0 + (c)] ^ state[5 + (c)] ^ state[10 + (c)] ^ state[15 + (c)] ^ state[20 + (c)]) + +// XOR w to all the lanes in column c of the state +// +// Note: The expression to be XORed is copied to a temporary variable to avoid reevaluation +#define KECCAK_COLUMN_UPDATE(state, c, w) { \ + UINT64 t = (w); \ + state[ 0 + (c)] ^= t; \ + state[ 5 + (c)] ^= t; \ + state[10 + (c)] ^= t; \ + state[15 + (c)] ^= t; \ + state[20 + (c)] ^= t; \ +} + +// Apply Theta transformation to the state +#define KECCAK_THETA(state) { \ + UINT64 colSum[5]; \ + colSum[0] = KECCAK_COLUMN_SUM(state, 0); \ + colSum[1] = KECCAK_COLUMN_SUM(state, 1); \ + colSum[2] = KECCAK_COLUMN_SUM(state, 2); \ + colSum[3] = KECCAK_COLUMN_SUM(state, 3); \ + colSum[4] = KECCAK_COLUMN_SUM(state, 4); \ + KECCAK_COLUMN_UPDATE(state, 0, colSum[4] ^ ROL64(colSum[1], 1)); \ + KECCAK_COLUMN_UPDATE(state, 1, colSum[0] ^ ROL64(colSum[2], 1)); \ + KECCAK_COLUMN_UPDATE(state, 2, colSum[1] ^ ROL64(colSum[3], 1)); \ + KECCAK_COLUMN_UPDATE(state, 3, colSum[2] ^ ROL64(colSum[4], 1)); \ + KECCAK_COLUMN_UPDATE(state, 4, colSum[3] ^ ROL64(colSum[0], 1)); \ +} + +// Apply Rho transformation to row r of the state +#define KECCAK_RHO_ROW(state, r) { \ + state[5 * (r) + 0] = ROL64(state[5 * (r) + 0], KeccakRhoK[5 * (r) + 0]); \ + state[5 * (r) + 1] = ROL64(state[5 * (r) + 1], KeccakRhoK[5 * (r) + 1]); \ + state[5 * (r) + 2] = ROL64(state[5 * (r) + 2], KeccakRhoK[5 * (r) + 2]); \ + state[5 * (r) + 3] = ROL64(state[5 * (r) + 3], KeccakRhoK[5 * (r) + 3]); \ + state[5 * (r) + 4] = ROL64(state[5 * (r) + 4], KeccakRhoK[5 * (r) + 4]); \ +} + +// Apply Rho transformation to row 0 of the state +// +// The first row contains a rotation by 0 on the first lane that uses a shift +// by 64 which we want to avoid. Rho operation below omits the rotation on the first lane. +#define KECCAK_RHO_ROW0(state) { \ + state[1] = ROL64(state[1], KeccakRhoK[1]); \ + state[2] = ROL64(state[2], KeccakRhoK[2]); \ + state[3] = ROL64(state[3], KeccakRhoK[3]); \ + state[4] = ROL64(state[4], KeccakRhoK[4]); \ +} + +// Apply Rho transformation to the state +#define KECCAK_RHO(state) { \ + KECCAK_RHO_ROW0(state); \ + KECCAK_RHO_ROW(state, 1); \ + KECCAK_RHO_ROW(state, 2); \ + KECCAK_RHO_ROW(state, 3); \ + KECCAK_RHO_ROW(state, 4); \ +} + +// Apply Pi transformation to the state +#define KECCAK_PI(state) { \ + UINT64 t = state[ 1]; state[ 1] = state[ 6]; state[ 6] = state[ 9]; state[ 9] = state[22]; state[22] = state[14]; \ + state[14] = state[20]; state[20] = state[ 2]; state[ 2] = state[12]; state[12] = state[13]; state[13] = state[19]; \ + state[19] = state[23]; state[23] = state[15]; state[15] = state[ 4]; state[ 4] = state[24]; state[24] = state[21]; \ + state[21] = state[ 8]; state[ 8] = state[16]; state[16] = state[ 5]; state[ 5] = state[ 3]; state[ 3] = state[18]; \ + state[18] = state[17]; state[17] = state[11]; state[11] = state[ 7]; state[ 7] = state[10]; state[10] = t; \ +} + +// Apply Chi transformation on row r of state +#define KECCAK_CHI_ROW(state, r) { \ + UINT64 t1 = state[5 * (r) + 0] ^ (~state[5 * (r) + 1] & state[5 * (r) + 2]); \ + UINT64 t2 = state[5 * (r) + 1] ^ (~state[5 * (r) + 2] & state[5 * (r) + 3]); \ + state[5 * (r) + 2] = state[5 * (r) + 2] ^ (~state[5 * (r) + 3] & state[5 * (r) + 4]); \ + state[5 * (r) + 3] = state[5 * (r) + 3] ^ (~state[5 * (r) + 4] & state[5 * (r) + 0]); \ + state[5 * (r) + 4] = state[5 * (r) + 4] ^ (~state[5 * (r) + 0] & state[5 * (r) + 1]); \ + state[5 * (r) + 0] = t1; \ + state[5 * (r) + 1] = t2; \ +} + +// Apply Chi transformation to state +#define KECCAK_CHI(state) { \ + KECCAK_CHI_ROW(state, 0); \ + KECCAK_CHI_ROW(state, 1); \ + KECCAK_CHI_ROW(state, 2); \ + KECCAK_CHI_ROW(state, 3); \ + KECCAK_CHI_ROW(state, 4); \ +} + +// Add round constant to state +#define KECCAK_IOTA(state, rnd) state[0] ^= KeccakIotaK[rnd] + +// Perform one round of Keccak permutation on state +#define KECCAK_PERM_ROUND(state, rnd) { \ + KECCAK_THETA(state); \ + KECCAK_RHO(state); \ + KECCAK_PI(state); \ + KECCAK_CHI(state); \ + KECCAK_IOTA(state, rnd); \ +} + + +// +// SymCryptKeccakPermute +// +VOID +SYMCRYPT_CALL +SymCryptKeccakPermute(_Inout_updates_(25) UINT64* pState) +{ + for (int r = 0; r < 24; r++) + { + KECCAK_PERM_ROUND(pState, r); + } +} + + +// +// SymCryptKeccakInit +// +VOID +SYMCRYPT_CALL +SymCryptKeccakInit(_Out_ PSYMCRYPT_KECCAK_STATE pState, UINT32 inputBlockSize, UINT8 paddingValue) +{ + pState->inputBlockSize = inputBlockSize; + pState->paddingValue = paddingValue; + + // Initialize the Keccak permutation state and set mutable state variables + // to their default values. + SymCryptKeccakReset(pState); +} + +VOID +SYMCRYPT_CALL +SymCryptKeccakReset(_Out_ PSYMCRYPT_KECCAK_STATE pState) +{ + // + // Wipe & re-initialize + // + // Wipe the Keccak permutation state and set the mutable state variables to their + // default values. Non-mutable state variables retain their values. State becomes + // re-initialized after this call. + SymCryptWipeKnownSize(pState->state, sizeof(pState->state)); + pState->stateIndex = 0; + pState->squeezeMode = FALSE; +} + +// +// SymCryptKeccakAppendByte +// +FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptKeccakAppendByte(_Inout_ PSYMCRYPT_KECCAK_STATE pState, BYTE val) +{ + SYMCRYPT_ASSERT(!pState->squeezeMode); + SYMCRYPT_ASSERT(pState->stateIndex < pState->inputBlockSize); + + pState->state[pState->stateIndex / sizeof(UINT64)] ^= ((UINT64)val << (8 * (pState->stateIndex % 8))); + pState->stateIndex++; +} + +// +// SymCryptKeccakAppendBytes +// +FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptKeccakAppendBytes(_Inout_ PSYMCRYPT_KECCAK_STATE pState, PCBYTE pbBuffer, SIZE_T cbBuffer) +{ + SYMCRYPT_ASSERT(!pState->squeezeMode); + SYMCRYPT_ASSERT((pState->stateIndex + cbBuffer) <= pState->inputBlockSize); + + for (SIZE_T i = 0; i < cbBuffer; i++) + { + pState->state[(pState->stateIndex + i) / sizeof(UINT64)] ^= ((UINT64)pbBuffer[i] << (8 * ((pState->stateIndex + i) % 8))); + } + + pState->stateIndex += (UINT32)cbBuffer; +} + + +// +// SymCryptKeccakAppendLanes +// +VOID +SYMCRYPT_CALL +SymCryptKeccakAppendLanes( + _Inout_ PSYMCRYPT_KECCAK_STATE pState, + _In_reads_(uLaneCount * sizeof(UINT64)) PCBYTE pbData, + SIZE_T uLaneCount) +{ + SYMCRYPT_ASSERT(!pState->squeezeMode); + SYMCRYPT_ASSERT((pState->inputBlockSize & 0x7) == 0); + SYMCRYPT_ASSERT((pState->stateIndex & 0x7) == 0); + SYMCRYPT_ASSERT(pState->stateIndex != pState->inputBlockSize); + + // Locate the lane in the state for next append. + // Currently, pState->stateIndex/sizeof(UINT64) of the lanes are used. + UINT32 uLaneIndex = pState->stateIndex / sizeof(UINT64); + + for (SIZE_T i = 0; i < uLaneCount; i++) + { + pState->state[uLaneIndex] ^= SYMCRYPT_LOAD_LSBFIRST64(pbData + i * sizeof(UINT64)); + pState->stateIndex += sizeof(UINT64); + uLaneIndex++; + + if (pState->stateIndex == pState->inputBlockSize) + { + SymCryptKeccakPermute(pState->state); + pState->stateIndex = 0; + uLaneIndex = 0; + } + } +} + +// +// SymCryptKeccakZeroAppendBlock +// +VOID +SYMCRYPT_CALL +SymCryptKeccakZeroAppendBlock(_Inout_ PSYMCRYPT_KECCAK_STATE pState) +{ + SYMCRYPT_ASSERT(!pState->squeezeMode); + SymCryptKeccakPermute(pState->state); + pState->stateIndex = 0; +} + +// +// SymCryptKeccakAppend +// +VOID +SYMCRYPT_CALL +SymCryptKeccakAppend( + _Inout_ PSYMCRYPT_KECCAK_STATE pState, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData) +{ + SYMCRYPT_ASSERT(pState->inputBlockSize % 8 == 0); + + // If we were in squeeze mode (Append is called after an Extract without wiping), + // switch to absorb mode to start a new hash computation. + if (pState->squeezeMode) + { + SymCryptKeccakReset(pState); + } + + SYMCRYPT_ASSERT(pState->stateIndex < pState->inputBlockSize); + + // Make pState->stateIndex a multiple of 8. + // Message block boundary will not be crossed, check + // if permutation is needed after this part. + while (cbData > 0 && (pState->stateIndex & 0x7)) + { + SymCryptKeccakAppendByte(pState, *pbData); + pbData++; + cbData--; + } + + // Permute if input message block is filled + if (pState->stateIndex == pState->inputBlockSize) + { + SymCryptKeccakPermute(pState->state); + pState->stateIndex = 0; + } + + // Append full lanes + SIZE_T uFullLanes = cbData / sizeof(UINT64); + if (uFullLanes > 0) + { + SymCryptKeccakAppendLanes(pState, pbData, uFullLanes); + pbData += uFullLanes * sizeof(UINT64); + cbData -= uFullLanes * sizeof(UINT64); + } + + SYMCRYPT_ASSERT(cbData < sizeof(UINT64)); + SymCryptKeccakAppendBytes(pState, pbData, cbData); + + SYMCRYPT_ASSERT(pState->stateIndex != pState->inputBlockSize); +} + +// +// SymCryptKeccakApplyPadding +// +VOID +SYMCRYPT_CALL +SymCryptKeccakApplyPadding(_Inout_ PSYMCRYPT_KECCAK_STATE pState) +{ + SYMCRYPT_ASSERT(!pState->squeezeMode); + + // Locate the lane and byte position for the padding byte + UINT32 uLanePos = pState->stateIndex / sizeof(UINT64); + UINT32 uBytePos = pState->stateIndex % sizeof(UINT64); + pState->state[uLanePos] ^= ((UINT64)pState->paddingValue << (8 * uBytePos)); + + // Pad the final 1 bit to the msb of the last lane in the rate portion of the state + pState->state[pState->inputBlockSize / sizeof(UINT64) - 1] ^= (1ULL << 63); + + // Process the padded block and switch to squeeze mode + SymCryptKeccakPermute(pState->state); + pState->stateIndex = 0; + pState->squeezeMode = TRUE; +} + +// +// SymCryptKeccakExtractByte +// +FORCEINLINE +BYTE +SYMCRYPT_CALL +SymCryptKeccakExtractByte(_Inout_ PSYMCRYPT_KECCAK_STATE pState) +{ + SYMCRYPT_ASSERT(pState->squeezeMode); + SYMCRYPT_ASSERT(pState->stateIndex < pState->inputBlockSize); + + BYTE ret = (BYTE)((pState->state[pState->stateIndex / sizeof(UINT64)] >> (8 * (pState->stateIndex % 8))) & 0xff); + pState->stateIndex++; + return ret; +} + +// +// SymCryptKeccakExtractLanes +// +VOID +SYMCRYPT_CALL +SymCryptKeccakExtractLanes( + _Inout_ PSYMCRYPT_KECCAK_STATE pState, + _Out_writes_(uLaneCount * sizeof(UINT64)) PBYTE pbResult, + SIZE_T uLaneCount) +{ + SYMCRYPT_ASSERT(pState->squeezeMode); + SYMCRYPT_ASSERT((pState->inputBlockSize & 0x7) == 0); + SYMCRYPT_ASSERT((pState->stateIndex & 0x7) == 0); + + // Locate the lane in the state for next extraction + UINT32 uLaneIndex = pState->stateIndex / sizeof(UINT64); + + for (SIZE_T i = 0; i < uLaneCount; i++) + { + SYMCRYPT_ASSERT(pState->stateIndex <= pState->inputBlockSize); + + if (pState->stateIndex == pState->inputBlockSize) + { + SymCryptKeccakPermute(pState->state); + pState->stateIndex = 0; + uLaneIndex = 0; + } + + SYMCRYPT_STORE_LSBFIRST64(pbResult + i * sizeof(UINT64), pState->state[uLaneIndex]); + pState->stateIndex += sizeof(UINT64); + uLaneIndex++; + } +} + +// +// SymCryptKeccakExtract +// +VOID +SYMCRYPT_CALL +SymCryptKeccakExtract( + _Inout_ PSYMCRYPT_KECCAK_STATE pState, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult, + BOOLEAN bWipe) +{ + // Apply padding and switch to squeeze mode if this is the first call to Extract + if (!pState->squeezeMode) + { + SymCryptKeccakApplyPadding(pState); + } + + // Do the permutation if there are no bytes available in the state + if ( (cbResult > 0) && (pState->stateIndex == pState->inputBlockSize) ) + { + SymCryptKeccakPermute(pState->state); + pState->stateIndex= 0; + } + + // Make stateIndex a multiple of 8 so that the extraction can be performed in lanes. + // We don't call the permutation as soon as the stateIndex reaches inputBlockSize, + // cbResult must also be non-zero for that. This condition is checked + // in ExtractLanes or in the 'remaining bytes' block that follows it. + while (cbResult > 0 && (pState->stateIndex & 0x7)) + { + *pbResult = SymCryptKeccakExtractByte(pState); + pbResult++; + cbResult--; + } + + SYMCRYPT_ASSERT((cbResult == 0) || ((pState->stateIndex & 0x7) == 0)); + + // Extract full lanes + SIZE_T uFullLanes = cbResult / sizeof(UINT64); + if (uFullLanes > 0) + { + SymCryptKeccakExtractLanes(pState, pbResult, uFullLanes); + pbResult += uFullLanes * sizeof(UINT64); + cbResult -= uFullLanes * sizeof(UINT64); + } + + // Extract the remaining bytes + SYMCRYPT_ASSERT(cbResult < sizeof(UINT64)); + while (cbResult > 0) + { + if (pState->stateIndex == pState->inputBlockSize) + { + SymCryptKeccakPermute(pState->state); + pState->stateIndex = 0; + } + + *pbResult = SymCryptKeccakExtractByte(pState); + pbResult++; + cbResult--; + } + + if (bWipe) + { + // Wipe the Keccak state and make it ready for a new hash computation + SymCryptKeccakReset(pState); + } +} + +// +// SymCryptKeccakStateExport +// +VOID +SYMCRYPT_CALL +SymCryptKeccakStateExport( + SYMCRYPT_BLOB_TYPE type, + _In_ PCSYMCRYPT_KECCAK_STATE pState, + _Out_writes_bytes_(SYMCRYPT_KECCAK_STATE_EXPORT_SIZE) PBYTE pbBlob) +{ + + SYMCRYPT_ALIGN SYMCRYPT_KECCAK_STATE_EXPORT_BLOB blob; // local copy to have proper alignment. + C_ASSERT(sizeof(blob) == SYMCRYPT_KECCAK_STATE_EXPORT_SIZE); + + SymCryptWipeKnownSize(&blob, sizeof(blob)); // wipe to avoid any data leakage + + blob.header.magic = SYMCRYPT_BLOB_MAGIC; + blob.header.size = SYMCRYPT_KECCAK_STATE_EXPORT_SIZE; + blob.header.type = type; + + // + // Copy the relevant data. Buffer will be 0-padded. + // + + SymCryptUint64ToLsbFirst(&pState->state[0], &blob.state[0], 25); + blob.stateIndex = pState->stateIndex; + blob.paddingValue = pState->paddingValue; + blob.squeezeMode = pState->squeezeMode; + + SYMCRYPT_ASSERT((PCBYTE)&blob + sizeof(blob) - sizeof(SYMCRYPT_BLOB_TRAILER) == (PCBYTE)&blob.trailer); + SymCryptMarvin32(SymCryptMarvin32DefaultSeed, (PCBYTE)&blob, sizeof(blob) - sizeof(SYMCRYPT_BLOB_TRAILER), &blob.trailer.checksum[0]); + + memcpy(pbBlob, &blob, sizeof(blob)); + + SymCryptWipeKnownSize(&blob, sizeof(blob)); + return; +} + + +// +// SymCryptKeccakStateImport +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptKeccakStateImport( + SYMCRYPT_BLOB_TYPE type, + _Out_ PSYMCRYPT_KECCAK_STATE pState, + _In_reads_bytes_(SYMCRYPT_KECCAK_STATE_EXPORT_SIZE) PCBYTE pbBlob) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + SYMCRYPT_ALIGN SYMCRYPT_KECCAK_STATE_EXPORT_BLOB blob; // local copy to have proper alignment. + BYTE checksum[8]; + + C_ASSERT(sizeof(blob) == SYMCRYPT_KECCAK_STATE_EXPORT_SIZE); + memcpy(&blob, pbBlob, sizeof(blob)); + + if (blob.header.magic != SYMCRYPT_BLOB_MAGIC || + blob.header.size != SYMCRYPT_KECCAK_STATE_EXPORT_SIZE || + blob.header.type != (UINT32)type) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + SymCryptMarvin32(SymCryptMarvin32DefaultSeed, (PCBYTE)&blob, sizeof(blob) - sizeof(SYMCRYPT_BLOB_TRAILER), checksum); + if (memcmp(checksum, &blob.trailer.checksum[0], 8) != 0) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + SymCryptLsbFirstToUint64(&blob.state[0], &pState->state[0], 25); + pState->stateIndex = blob.stateIndex; + pState->paddingValue = blob.paddingValue; + pState->squeezeMode = blob.squeezeMode; + + // + // Set state fields based on the blob type and do validation + // + + // default values indicate error + pState->inputBlockSize = 0; + pState->paddingValue = 0; + + switch (blob.header.type) + { + case SymCryptBlobTypeSha3_224State: + pState->inputBlockSize = SYMCRYPT_SHA3_224_INPUT_BLOCK_SIZE; + if (blob.paddingValue == SYMCRYPT_SHA3_PADDING_VALUE) + { + pState->paddingValue = blob.paddingValue; + } + break; + case SymCryptBlobTypeSha3_256State: + pState->inputBlockSize = SYMCRYPT_SHA3_256_INPUT_BLOCK_SIZE; + if (blob.paddingValue == SYMCRYPT_SHA3_PADDING_VALUE) + { + pState->paddingValue = blob.paddingValue; + } + break; + + case SymCryptBlobTypeSha3_384State: + pState->inputBlockSize = SYMCRYPT_SHA3_384_INPUT_BLOCK_SIZE; + if (blob.paddingValue == SYMCRYPT_SHA3_PADDING_VALUE) + { + pState->paddingValue = blob.paddingValue; + } + break; + + case SymCryptBlobTypeSha3_512State: + pState->inputBlockSize = SYMCRYPT_SHA3_512_INPUT_BLOCK_SIZE; + if (blob.paddingValue == SYMCRYPT_SHA3_PADDING_VALUE) + { + pState->paddingValue = blob.paddingValue; + } + break; + default: + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + if (pState->inputBlockSize == 0 || pState->paddingValue == 0) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + if (pState->stateIndex > pState->inputBlockSize) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + // Allow stateIndex = inputBlockSize only in squeeze mode + if ((pState->stateIndex == pState->inputBlockSize) && !pState->squeezeMode) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + +cleanup: + SymCryptWipeKnownSize(&blob, sizeof(blob)); + + return scError; +} diff --git a/libs/symcrypt/lib/sha3_224.c b/libs/symcrypt/lib/sha3_224.c new file mode 100644 index 00000000000..69dec754509 --- /dev/null +++ b/libs/symcrypt/lib/sha3_224.c @@ -0,0 +1,141 @@ +// +// Sha3_224.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +// +// See the symcrypt.h file for documentation on what the various functions do. +// + + +const SYMCRYPT_HASH SymCryptSha3_224Algorithm_default = { + &SymCryptSha3_224Init, + &SymCryptSha3_224Append, + &SymCryptSha3_224Result, + NULL, // AppendBlocks function is not implemented for SHA-3 + &SymCryptSha3_224StateCopy, + sizeof(SYMCRYPT_SHA3_224_STATE), + SYMCRYPT_SHA3_224_RESULT_SIZE, + SYMCRYPT_SHA3_224_INPUT_BLOCK_SIZE, + SYMCRYPT_FIELD_OFFSET(SYMCRYPT_SHA3_224_STATE, ks.state), + SYMCRYPT_FIELD_SIZE(SYMCRYPT_SHA3_224_STATE, ks.state), +}; + +const PCSYMCRYPT_HASH SymCryptSha3_224Algorithm = &SymCryptSha3_224Algorithm_default; + + +// +// SymCryptSha3_224 +// +#define ALG SHA3_224 +#define Alg Sha3_224 +#include "hash_pattern.c" +#undef ALG +#undef Alg + + +// +// SymCryptSha3_224Init +// +VOID +SYMCRYPT_CALL +SymCryptSha3_224Init(_Out_ PSYMCRYPT_SHA3_224_STATE pState) +{ + SymCryptKeccakInit(&pState->ks, + SYMCRYPT_SHA3_224_INPUT_BLOCK_SIZE, + SYMCRYPT_SHA3_PADDING_VALUE); + + SYMCRYPT_SET_MAGIC(pState); +} + + +// +// SymCryptSha3_224Append +// +VOID +SYMCRYPT_CALL +SymCryptSha3_224Append( + _Inout_ PSYMCRYPT_SHA3_224_STATE pState, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData) +{ + SymCryptKeccakAppend(&pState->ks, pbData, cbData); +} + + +// +// SymCryptSha3_224Result +// +VOID +SYMCRYPT_CALL +SymCryptSha3_224Result( + _Inout_ PSYMCRYPT_SHA3_224_STATE pState, + _Out_writes_(SYMCRYPT_SHA3_224_RESULT_SIZE) PBYTE pbResult) +{ + SymCryptKeccakExtract(&pState->ks, pbResult, SYMCRYPT_SHA3_224_RESULT_SIZE, TRUE); +} + + +// +// SymCryptSha3_224StateExport +// +VOID +SYMCRYPT_CALL +SymCryptSha3_224StateExport( + _In_ PCSYMCRYPT_SHA3_224_STATE pState, + _Out_writes_bytes_(SYMCRYPT_SHA3_224_STATE_EXPORT_SIZE) PBYTE pbBlob) +{ + SYMCRYPT_CHECK_MAGIC(pState); + SymCryptKeccakStateExport(SymCryptBlobTypeSha3_224State, &pState->ks, pbBlob); +} + + +// +// SymCryptSha3_224StateImport +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSha3_224StateImport( + _Out_ PSYMCRYPT_SHA3_224_STATE pState, + _In_reads_bytes_(SYMCRYPT_SHA3_224_STATE_EXPORT_SIZE) PCBYTE pbBlob) +{ + SYMCRYPT_ERROR scError = SymCryptKeccakStateImport(SymCryptBlobTypeSha3_224State, &pState->ks, pbBlob); + + if (scError == SYMCRYPT_NO_ERROR) + { + SYMCRYPT_SET_MAGIC(pState); + } + + return scError; +} + + +// +// Simple test vector for FIPS module testing +// + +static const BYTE sha3_224KATAnswer[28] = { + 0xe6, 0x42, 0x82, 0x4c, 0x3f, 0x8c, 0xf2, 0x4a, + 0xd0, 0x92, 0x34, 0xee, 0x7d, 0x3c, 0x76, 0x6f, + 0xc9, 0xa3, 0xa5, 0x16, 0x8d, 0x0c, 0x94, 0xad, + 0x73, 0xb4, 0x6f, 0xdf, +}; + +VOID +SYMCRYPT_CALL +SymCryptSha3_224Selftest(void) +{ + BYTE result[SYMCRYPT_SHA3_224_RESULT_SIZE]; + + SymCryptSha3_224(SymCryptTestMsg3, sizeof(SymCryptTestMsg3), result); + + SymCryptInjectError(result, sizeof(result)); + + if (memcmp(result, sha3_224KATAnswer, sizeof(result)) != 0) + { + SymCryptFatal('SHA3'); + } +} diff --git a/libs/symcrypt/lib/sha3_256.c b/libs/symcrypt/lib/sha3_256.c new file mode 100644 index 00000000000..aabe11b71a3 --- /dev/null +++ b/libs/symcrypt/lib/sha3_256.c @@ -0,0 +1,141 @@ +// +// Sha3_256.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +// +// See the symcrypt.h file for documentation on what the various functions do. +// + + +const SYMCRYPT_HASH SymCryptSha3_256Algorithm_default = { + &SymCryptSha3_256Init, + &SymCryptSha3_256Append, + &SymCryptSha3_256Result, + NULL, // AppendBlocks function is not implemented for SHA-3 + &SymCryptSha3_256StateCopy, + sizeof(SYMCRYPT_SHA3_256_STATE), + SYMCRYPT_SHA3_256_RESULT_SIZE, + SYMCRYPT_SHA3_256_INPUT_BLOCK_SIZE, + SYMCRYPT_FIELD_OFFSET(SYMCRYPT_SHA3_256_STATE, ks.state), + SYMCRYPT_FIELD_SIZE(SYMCRYPT_SHA3_256_STATE, ks.state), +}; + +const PCSYMCRYPT_HASH SymCryptSha3_256Algorithm = &SymCryptSha3_256Algorithm_default; + + +// +// SymCryptSha3_256 +// +#define ALG SHA3_256 +#define Alg Sha3_256 +#include "hash_pattern.c" +#undef ALG +#undef Alg + + +// +// SymCryptSha3_256Init +// +VOID +SYMCRYPT_CALL +SymCryptSha3_256Init(_Out_ PSYMCRYPT_SHA3_256_STATE pState) +{ + SymCryptKeccakInit(&pState->ks, + SYMCRYPT_SHA3_256_INPUT_BLOCK_SIZE, + SYMCRYPT_SHA3_PADDING_VALUE); + + SYMCRYPT_SET_MAGIC(pState); +} + + +// +// SymCryptSha3_256Append +// +VOID +SYMCRYPT_CALL +SymCryptSha3_256Append( + _Inout_ PSYMCRYPT_SHA3_256_STATE pState, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData) +{ + SymCryptKeccakAppend(&pState->ks, pbData, cbData); +} + + +// +// SymCryptSha3_256Result +// +VOID +SYMCRYPT_CALL +SymCryptSha3_256Result( + _Inout_ PSYMCRYPT_SHA3_256_STATE pState, + _Out_writes_(SYMCRYPT_SHA3_256_RESULT_SIZE) PBYTE pbResult) +{ + SymCryptKeccakExtract(&pState->ks, pbResult, SYMCRYPT_SHA3_256_RESULT_SIZE, TRUE); +} + + +// +// SymCryptSha3_256StateExport +// +VOID +SYMCRYPT_CALL +SymCryptSha3_256StateExport( + _In_ PCSYMCRYPT_SHA3_256_STATE pState, + _Out_writes_bytes_(SYMCRYPT_SHA3_256_STATE_EXPORT_SIZE) PBYTE pbBlob) +{ + SYMCRYPT_CHECK_MAGIC(pState); + SymCryptKeccakStateExport(SymCryptBlobTypeSha3_256State, &pState->ks, pbBlob); +} + + +// +// SymCryptSha3_256StateImport +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSha3_256StateImport( + _Out_ PSYMCRYPT_SHA3_256_STATE pState, + _In_reads_bytes_(SYMCRYPT_SHA3_256_STATE_EXPORT_SIZE) PCBYTE pbBlob) +{ + SYMCRYPT_ERROR scError = SymCryptKeccakStateImport(SymCryptBlobTypeSha3_256State, &pState->ks, pbBlob); + + if (scError == SYMCRYPT_NO_ERROR) + { + SYMCRYPT_SET_MAGIC(pState); + } + + return scError; +} + + +// +// Simple test vector for FIPS module testing +// + +static const BYTE sha3_256KATAnswer[32] = { + 0x3a, 0x98, 0x5d, 0xa7, 0x4f, 0xe2, 0x25, 0xb2, + 0x04, 0x5c, 0x17, 0x2d, 0x6b, 0xd3, 0x90, 0xbd, + 0x85, 0x5f, 0x08, 0x6e, 0x3e, 0x9d, 0x52, 0x5b, + 0x46, 0xbf, 0xe2, 0x45, 0x11, 0x43, 0x15, 0x32 +}; + +VOID +SYMCRYPT_CALL +SymCryptSha3_256Selftest(void) +{ + BYTE result[SYMCRYPT_SHA3_256_RESULT_SIZE]; + + SymCryptSha3_256(SymCryptTestMsg3, sizeof(SymCryptTestMsg3), result); + + SymCryptInjectError(result, sizeof(result)); + + if (memcmp(result, sha3_256KATAnswer, sizeof(result)) != 0) + { + SymCryptFatal('SHA3'); + } +} diff --git a/libs/symcrypt/lib/sha3_384.c b/libs/symcrypt/lib/sha3_384.c new file mode 100644 index 00000000000..0f557661ac2 --- /dev/null +++ b/libs/symcrypt/lib/sha3_384.c @@ -0,0 +1,143 @@ +// +// Sha3_384.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +// +// See the symcrypt.h file for documentation on what the various functions do. +// + + +const SYMCRYPT_HASH SymCryptSha3_384Algorithm_default = { + &SymCryptSha3_384Init, + &SymCryptSha3_384Append, + &SymCryptSha3_384Result, + NULL, // AppendBlocks function is not implemented for SHA-3 + &SymCryptSha3_384StateCopy, + sizeof(SYMCRYPT_SHA3_384_STATE), + SYMCRYPT_SHA3_384_RESULT_SIZE, + SYMCRYPT_SHA3_384_INPUT_BLOCK_SIZE, + SYMCRYPT_FIELD_OFFSET(SYMCRYPT_SHA3_384_STATE, ks.state), + SYMCRYPT_FIELD_SIZE(SYMCRYPT_SHA3_384_STATE, ks.state), +}; + +const PCSYMCRYPT_HASH SymCryptSha3_384Algorithm = &SymCryptSha3_384Algorithm_default; + + +// +// SymCryptSha3_384 +// +#define ALG SHA3_384 +#define Alg Sha3_384 +#include "hash_pattern.c" +#undef ALG +#undef Alg + + +// +// SymCryptSha3_384Init +// +VOID +SYMCRYPT_CALL +SymCryptSha3_384Init(_Out_ PSYMCRYPT_SHA3_384_STATE pState) +{ + SymCryptKeccakInit(&pState->ks, + SYMCRYPT_SHA3_384_INPUT_BLOCK_SIZE, + SYMCRYPT_SHA3_PADDING_VALUE); + + SYMCRYPT_SET_MAGIC(pState); +} + + +// +// SymCryptSha3_384Append +// +VOID +SYMCRYPT_CALL +SymCryptSha3_384Append( + _Inout_ PSYMCRYPT_SHA3_384_STATE pState, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData) +{ + SymCryptKeccakAppend(&pState->ks, pbData, cbData); +} + + +// +// SymCryptSha3_384Result +// +VOID +SYMCRYPT_CALL +SymCryptSha3_384Result( + _Inout_ PSYMCRYPT_SHA3_384_STATE pState, + _Out_writes_(SYMCRYPT_SHA3_384_RESULT_SIZE) PBYTE pbResult) +{ + SymCryptKeccakExtract(&pState->ks, pbResult, SYMCRYPT_SHA3_384_RESULT_SIZE, TRUE); +} + + +// +// SymCryptSha3_384StateExport +// +VOID +SYMCRYPT_CALL +SymCryptSha3_384StateExport( + _In_ PCSYMCRYPT_SHA3_384_STATE pState, + _Out_writes_bytes_(SYMCRYPT_SHA3_384_STATE_EXPORT_SIZE) PBYTE pbBlob) +{ + SYMCRYPT_CHECK_MAGIC(pState); + SymCryptKeccakStateExport(SymCryptBlobTypeSha3_384State, &pState->ks, pbBlob); +} + + +// +// SymCryptSha3_384StateImport +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSha3_384StateImport( + _Out_ PSYMCRYPT_SHA3_384_STATE pState, + _In_reads_bytes_(SYMCRYPT_SHA3_384_STATE_EXPORT_SIZE) PCBYTE pbBlob) +{ + SYMCRYPT_ERROR scError = SymCryptKeccakStateImport(SymCryptBlobTypeSha3_384State, &pState->ks, pbBlob); + + if (scError == SYMCRYPT_NO_ERROR) + { + SYMCRYPT_SET_MAGIC(pState); + } + + return scError; +} + + +// +// Simple test vector for FIPS module testing +// + +static const BYTE sha3_384KATAnswer[48] = { + 0xec, 0x01, 0x49, 0x82, 0x88, 0x51, 0x6f, 0xc9, + 0x26, 0x45, 0x9f, 0x58, 0xe2, 0xc6, 0xad, 0x8d, + 0xf9, 0xb4, 0x73, 0xcb, 0x0f, 0xc0, 0x8c, 0x25, + 0x96, 0xda, 0x7c, 0xf0, 0xe4, 0x9b, 0xe4, 0xb2, + 0x98, 0xd8, 0x8c, 0xea, 0x92, 0x7a, 0xc7, 0xf5, + 0x39, 0xf1, 0xed, 0xf2, 0x28, 0x37, 0x6d, 0x25 +}; + +VOID +SYMCRYPT_CALL +SymCryptSha3_384Selftest(void) +{ + BYTE result[SYMCRYPT_SHA3_384_RESULT_SIZE]; + + SymCryptSha3_384(SymCryptTestMsg3, sizeof(SymCryptTestMsg3), result); + + SymCryptInjectError(result, sizeof(result)); + + if (memcmp(result, sha3_384KATAnswer, sizeof(result)) != 0) + { + SymCryptFatal('SHA3'); + } +} diff --git a/libs/symcrypt/lib/sha3_512.c b/libs/symcrypt/lib/sha3_512.c new file mode 100644 index 00000000000..71eccc71a64 --- /dev/null +++ b/libs/symcrypt/lib/sha3_512.c @@ -0,0 +1,144 @@ +// +// Sha3_512.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +// +// See the symcrypt.h file for documentation on what the various functions do. +// + + +const SYMCRYPT_HASH SymCryptSha3_512Algorithm_default = { + &SymCryptSha3_512Init, + &SymCryptSha3_512Append, + &SymCryptSha3_512Result, + NULL, // AppendBlocks function is not implemented for SHA-3 + &SymCryptSha3_512StateCopy, + sizeof(SYMCRYPT_SHA3_512_STATE), + SYMCRYPT_SHA3_512_RESULT_SIZE, + SYMCRYPT_SHA3_512_INPUT_BLOCK_SIZE, + SYMCRYPT_FIELD_OFFSET(SYMCRYPT_SHA3_512_STATE, ks.state), + SYMCRYPT_FIELD_SIZE(SYMCRYPT_SHA3_512_STATE, ks.state), +}; + +const PCSYMCRYPT_HASH SymCryptSha3_512Algorithm = &SymCryptSha3_512Algorithm_default; + + +// +// SymCryptSha3_512 +// +#define ALG SHA3_512 +#define Alg Sha3_512 +#include "hash_pattern.c" +#undef ALG +#undef Alg + + +// +// SymCryptSha3_512Init +// +VOID +SYMCRYPT_CALL +SymCryptSha3_512Init(_Out_ PSYMCRYPT_SHA3_512_STATE pState) +{ + SymCryptKeccakInit(&pState->ks, + SYMCRYPT_SHA3_512_INPUT_BLOCK_SIZE, + SYMCRYPT_SHA3_PADDING_VALUE); + + SYMCRYPT_SET_MAGIC(pState); +} + + +// +// SymCryptSha3_512Append +// +VOID +SYMCRYPT_CALL +SymCryptSha3_512Append( + _Inout_ PSYMCRYPT_SHA3_512_STATE pState, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData) +{ + SymCryptKeccakAppend(&pState->ks, pbData, cbData); +} + + +// +// SymCryptSha3_512Result +// +VOID +SYMCRYPT_CALL +SymCryptSha3_512Result( + _Inout_ PSYMCRYPT_SHA3_512_STATE pState, + _Out_writes_(SYMCRYPT_SHA3_512_RESULT_SIZE) PBYTE pbResult) +{ + SymCryptKeccakExtract(&pState->ks, pbResult, SYMCRYPT_SHA3_512_RESULT_SIZE, TRUE); +} + + +// +// SymCryptSha3_512StateExport +// +VOID +SYMCRYPT_CALL +SymCryptSha3_512StateExport( + _In_ PCSYMCRYPT_SHA3_512_STATE pState, + _Out_writes_bytes_(SYMCRYPT_SHA3_512_STATE_EXPORT_SIZE) PBYTE pbBlob) +{ + SYMCRYPT_CHECK_MAGIC(pState); + SymCryptKeccakStateExport(SymCryptBlobTypeSha3_512State, &pState->ks, pbBlob); +} + +// +// SymCryptSha3_512StateExport +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSha3_512StateImport( + _Out_ PSYMCRYPT_SHA3_512_STATE pState, + _In_reads_bytes_(SYMCRYPT_SHA3_512_STATE_EXPORT_SIZE) PCBYTE pbBlob) +{ + SYMCRYPT_ERROR scError = SymCryptKeccakStateImport(SymCryptBlobTypeSha3_512State, &pState->ks, pbBlob); + + if (scError == SYMCRYPT_NO_ERROR) + { + SYMCRYPT_SET_MAGIC(pState); + } + + return scError; +} + + +// +// Simple test vector for FIPS module testing +// + +static const BYTE sha3_512KATAnswer[64] = { + 0xb7, 0x51, 0x85, 0x0b, 0x1a, 0x57, 0x16, 0x8a, + 0x56, 0x93, 0xcd, 0x92, 0x4b, 0x6b, 0x09, 0x6e, + 0x08, 0xf6, 0x21, 0x82, 0x74, 0x44, 0xf7, 0x0d, + 0x88, 0x4f, 0x5d, 0x02, 0x40, 0xd2, 0x71, 0x2e, + 0x10, 0xe1, 0x16, 0xe9, 0x19, 0x2a, 0xf3, 0xc9, + 0x1a, 0x7e, 0xc5, 0x76, 0x47, 0xe3, 0x93, 0x40, + 0x57, 0x34, 0x0b, 0x4c, 0xf4, 0x08, 0xd5, 0xa5, + 0x65, 0x92, 0xf8, 0x27, 0x4e, 0xec, 0x53, 0xf0 +}; + +VOID +SYMCRYPT_CALL +SymCryptSha3_512Selftest(void) +{ + BYTE result[SYMCRYPT_SHA3_512_RESULT_SIZE]; + + SymCryptSha3_512(SymCryptTestMsg3, sizeof(SymCryptTestMsg3), result); + + SymCryptInjectError(result, sizeof(result)); + + if (memcmp(result, sha3_512KATAnswer, sizeof(result)) != 0) + { + SymCryptFatal('SHA3'); + } +} diff --git a/libs/symcrypt/lib/sha512-ymm.c b/libs/symcrypt/lib/sha512-ymm.c new file mode 100644 index 00000000000..de7f87fd3a5 --- /dev/null +++ b/libs/symcrypt/lib/sha512-ymm.c @@ -0,0 +1,801 @@ +#include "precomp.h" + +#if SYMCRYPT_CPU_AMD64 + +#ifdef __clang__ +#pragma clang attribute push (__attribute__((target("avx2"))), apply_to=function) +#else +#pragma GCC push_options +#pragma GCC target("avx2") +#endif + +extern SYMCRYPT_ALIGN_AT(64) const UINT64 SymCryptSha512K[81]; + + +// Endianness transformation for 4 64-bit values in a YMM register +const SYMCRYPT_ALIGN_AT(32) UINT64 BYTE_REVERSE_64X2[4] = { + 0x0001020304050607, 0x08090a0b0c0d0e0f, + 0x0001020304050607, 0x08090a0b0c0d0e0f +}; + +// Rotate right each 64-bit value in a YMM register by 1 byte +const SYMCRYPT_ALIGN_AT(32) UINT64 BYTE_ROTATE_64[4] = { + 0x0007060504030201, 0x080f0e0d0c0b0a09, + 0x0007060504030201, 0x080f0e0d0c0b0a09, +}; + + +#if SYMCRYPT_MS_VC && !defined(__clang__) +#define RORX_U32 _rorx_u32 +#define RORX_U64 _rorx_u64 +#else +// TODO: implement _rorx functions for clang +#define RORX_U32 ROR32 +#define RORX_U64 ROR64 +#endif // SYMCRYPT_MS_VC + + +// +// For documentation on these function see FIPS 180-2 +// +// MAJ and CH are the functions Maj and Ch from the standard. +// CSIGMA0 and CSIGMA1 are the capital sigma functions. +// LSIGMA0 and LSIGMA1 are the lowercase sigma functions. +// +// The canonical definitions of the MAJ and CH functions are: +//#define MAJ( x, y, z ) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) +//#define CH( x, y, z ) (((x) & (y)) ^ ((~(x)) & (z))) +// We use optimized versions defined below +// + +#define MAJ( x, y, z ) ((((z) | (y)) & (x) ) | ((z) & (y))) +#define CH( x, y, z ) ((((z) ^ (y)) & (x)) ^ (z)) + +#define CSIGMA0(x) (RORX_U64(x, 28) ^ RORX_U64(x, 34) ^ RORX_U64(x, 39)) +#define CSIGMA1(x) (RORX_U64(x, 14) ^ RORX_U64(x, 18) ^ RORX_U64(x, 41)) + +#define LSIGMA0( x ) (ROR64( (x) ^ ROR64((x), 7), 1) ^ ((x)>> 7)) +#define LSIGMA1( x ) (ROR64( (x) ^ ROR64((x), 42), 19) ^ ((x)>> 6)) + +#define YMMADD( _a, _b ) _mm256_add_epi64((_a), (_b)) +#define YMMROR( _a, _n ) _mm256_xor_si256( _mm256_slli_epi64( (_a), 64-(_n)), _mm256_srli_epi64( (_a), (_n)) ) +#define YMMSHR( _a, _n ) _mm256_srli_epi64((_a), (_n)) +#define YMMXOR( _a, _b ) _mm256_xor_si256((_a), (_b)) + +// Rotation by 8 bits is faster with byte shuffling +#if 1 +#define YMMROR8( _a ) _mm256_shuffle_epi8((_a), _mm256_load_si256((__m256i*)BYTE_ROTATE_64)) +#define YMMLSIGMA0( x ) YMMXOR( YMMXOR( YMMROR((x), 1), YMMROR8((x))), YMMSHR((x), 7)) +#else +#define YMMLSIGMA0( x ) YMMXOR( YMMXOR( YMMROR((x), 1), YMMROR((x), 8)), YMMSHR((x), 7)) +#endif + +#define YMMLSIGMA1( x ) YMMXOR( YMMXOR( YMMROR((x), 19), YMMROR((x), 61)), YMMSHR((x), 6)) + + +// +// YMM implementation that processes 1 message block at a time +// + +// Core round function +// Message words are loaded from Wx.ul[80]. +#define CROUND_1BLOCK(a, b, c, d, e, f, g, h, r) {;\ + UINT64 T1 = CSIGMA1(e) + CH(e, f, g) + Wx.ul[r] + SymCryptSha512K[r]; \ + UINT64 T2 = CSIGMA0(a) + MAJ(a, b, c); \ + h += T1; \ + d += h;\ + h += T2;\ +} + +// +// Message expansion for 4 rounds +// +// Each element of Wx.ymm[] array contains 4 message words, with the +// first 4 elements containing the original 16 message words. These are +// then expanded 16 times to generate the next 16 * 4 message words, +// comprising the 80 expanded message words in the union arrays Wx.ymm[20] or Wx.ul[80]. +// +// rnd: starts from 16 (updating the 4th element of the Wx.ymm[] array) and +// goes up to 76 in multiples of 4. +// +#define SHA512_MSG_EXPAND_1BLOCK_4ROUNDS(rnd) { \ + Wx.ymm[(rnd) / 4] = _mm256_add_epi64(_mm256_add_epi64( \ + YMMLSIGMA0(_mm256_loadu_si256((__m256i*)& Wx.ul[(rnd) - 15])), \ + _mm256_load_si256((__m256i*)& Wx.ul[(rnd) - 16])), \ + _mm256_loadu_si256((__m256i*)& Wx.ul[(rnd) - 7])); \ + Wx.ul[(rnd) + 0] += LSIGMA1(Wx.ul[(rnd) - 2]); \ + Wx.ul[(rnd) + 1] += LSIGMA1(Wx.ul[(rnd) - 1]); \ + Wx.ul[(rnd) + 2] += LSIGMA1(Wx.ul[(rnd) + 0]); \ + Wx.ul[(rnd) + 3] += LSIGMA1(Wx.ul[(rnd) + 1]); \ +} + + +VOID +SYMCRYPT_CALL +SymCryptSha512AppendBlocks_ymm_1block( + _Inout_ SYMCRYPT_SHA512_CHAINING_STATE* pChain, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T* pcbRemaining) +{ + SYMCRYPT_ALIGN_AT(32) union { UINT64 ul[80]; __m256i ymm[20]; } Wx; + UINT64 A, B, C, D, E, F, G, H; + + _mm256_zeroupper(); + + while (cbData >= SYMCRYPT_SHA512_INPUT_BLOCK_SIZE) + { + A = pChain->H[0]; + B = pChain->H[1]; + C = pChain->H[2]; + D = pChain->H[3]; + E = pChain->H[4]; + F = pChain->H[5]; + G = pChain->H[6]; + H = pChain->H[7]; + +#if 0 + Wx.ul[ 0] = SYMCRYPT_LOAD_MSBFIRST64(&pbData[8 * 0]); + Wx.ul[ 1] = SYMCRYPT_LOAD_MSBFIRST64(&pbData[8 * 1]); + Wx.ul[ 2] = SYMCRYPT_LOAD_MSBFIRST64(&pbData[8 * 2]); + Wx.ul[ 3] = SYMCRYPT_LOAD_MSBFIRST64(&pbData[8 * 3]); + Wx.ul[ 4] = SYMCRYPT_LOAD_MSBFIRST64(&pbData[8 * 4]); + Wx.ul[ 5] = SYMCRYPT_LOAD_MSBFIRST64(&pbData[8 * 5]); + Wx.ul[ 6] = SYMCRYPT_LOAD_MSBFIRST64(&pbData[8 * 6]); + Wx.ul[ 7] = SYMCRYPT_LOAD_MSBFIRST64(&pbData[8 * 7]); + Wx.ul[ 8] = SYMCRYPT_LOAD_MSBFIRST64(&pbData[8 * 8]); + Wx.ul[ 9] = SYMCRYPT_LOAD_MSBFIRST64(&pbData[8 * 9]); + Wx.ul[10] = SYMCRYPT_LOAD_MSBFIRST64(&pbData[8 * 10]); + Wx.ul[11] = SYMCRYPT_LOAD_MSBFIRST64(&pbData[8 * 11]); + Wx.ul[12] = SYMCRYPT_LOAD_MSBFIRST64(&pbData[8 * 12]); + Wx.ul[13] = SYMCRYPT_LOAD_MSBFIRST64(&pbData[8 * 13]); + Wx.ul[14] = SYMCRYPT_LOAD_MSBFIRST64(&pbData[8 * 14]); + Wx.ul[15] = SYMCRYPT_LOAD_MSBFIRST64(&pbData[8 * 15]); +#else + Wx.ymm[0] = _mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*) & pbData[0 * SYMCRYPT_SHA512_INPUT_BLOCK_SIZE + (0) * 32]), _mm256_load_si256((__m256i*)BYTE_REVERSE_64X2)); + Wx.ymm[1] = _mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*) & pbData[0 * SYMCRYPT_SHA512_INPUT_BLOCK_SIZE + (1) * 32]), _mm256_load_si256((__m256i*)BYTE_REVERSE_64X2)); + Wx.ymm[2] = _mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*) & pbData[0 * SYMCRYPT_SHA512_INPUT_BLOCK_SIZE + (2) * 32]), _mm256_load_si256((__m256i*)BYTE_REVERSE_64X2)); + Wx.ymm[3] = _mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*) & pbData[0 * SYMCRYPT_SHA512_INPUT_BLOCK_SIZE + (3) * 32]), _mm256_load_si256((__m256i*)BYTE_REVERSE_64X2)); +#endif + + for (int iterCount=0; iterCount<(64/16); iterCount++) + { + const int roundBase = iterCount*16; + + CROUND_1BLOCK(A, B, C, D, E, F, G, H, roundBase + 0); + CROUND_1BLOCK(H, A, B, C, D, E, F, G, roundBase + 1); + CROUND_1BLOCK(G, H, A, B, C, D, E, F, roundBase + 2); + CROUND_1BLOCK(F, G, H, A, B, C, D, E, roundBase + 3); + SHA512_MSG_EXPAND_1BLOCK_4ROUNDS(roundBase + 16); + + CROUND_1BLOCK(E, F, G, H, A, B, C, D, roundBase + 4); + CROUND_1BLOCK(D, E, F, G, H, A, B, C, roundBase + 5); + CROUND_1BLOCK(C, D, E, F, G, H, A, B, roundBase + 6); + CROUND_1BLOCK(B, C, D, E, F, G, H, A, roundBase + 7); + SHA512_MSG_EXPAND_1BLOCK_4ROUNDS(roundBase + 20); + + CROUND_1BLOCK(A, B, C, D, E, F, G, H, roundBase + 8); + CROUND_1BLOCK(H, A, B, C, D, E, F, G, roundBase + 9); + CROUND_1BLOCK(G, H, A, B, C, D, E, F, roundBase + 10); + CROUND_1BLOCK(F, G, H, A, B, C, D, E, roundBase + 11); + SHA512_MSG_EXPAND_1BLOCK_4ROUNDS(roundBase + 24); + + CROUND_1BLOCK(E, F, G, H, A, B, C, D, roundBase + 12); + CROUND_1BLOCK(D, E, F, G, H, A, B, C, roundBase + 13); + CROUND_1BLOCK(C, D, E, F, G, H, A, B, roundBase + 14); + CROUND_1BLOCK(B, C, D, E, F, G, H, A, roundBase + 15); + SHA512_MSG_EXPAND_1BLOCK_4ROUNDS(roundBase + 28); + } + + CROUND_1BLOCK(A, B, C, D, E, F, G, H, 64 + 0); + CROUND_1BLOCK(H, A, B, C, D, E, F, G, 64 + 1); + CROUND_1BLOCK(G, H, A, B, C, D, E, F, 64 + 2); + CROUND_1BLOCK(F, G, H, A, B, C, D, E, 64 + 3); + CROUND_1BLOCK(E, F, G, H, A, B, C, D, 64 + 4); + CROUND_1BLOCK(D, E, F, G, H, A, B, C, 64 + 5); + CROUND_1BLOCK(C, D, E, F, G, H, A, B, 64 + 6); + CROUND_1BLOCK(B, C, D, E, F, G, H, A, 64 + 7); + CROUND_1BLOCK(A, B, C, D, E, F, G, H, 64 + 8); + CROUND_1BLOCK(H, A, B, C, D, E, F, G, 64 + 9); + CROUND_1BLOCK(G, H, A, B, C, D, E, F, 64 + 10); + CROUND_1BLOCK(F, G, H, A, B, C, D, E, 64 + 11); + CROUND_1BLOCK(E, F, G, H, A, B, C, D, 64 + 12); + CROUND_1BLOCK(D, E, F, G, H, A, B, C, 64 + 13); + CROUND_1BLOCK(C, D, E, F, G, H, A, B, 64 + 14); + CROUND_1BLOCK(B, C, D, E, F, G, H, A, 64 + 15); + + pChain->H[0] = A + pChain->H[0]; + pChain->H[1] = B + pChain->H[1]; + pChain->H[2] = C + pChain->H[2]; + pChain->H[3] = D + pChain->H[3]; + pChain->H[4] = E + pChain->H[4]; + pChain->H[5] = F + pChain->H[5]; + pChain->H[6] = G + pChain->H[6]; + pChain->H[7] = H + pChain->H[7]; + + pbData += SYMCRYPT_SHA512_INPUT_BLOCK_SIZE; + cbData -= SYMCRYPT_SHA512_INPUT_BLOCK_SIZE; + } + + *pcbRemaining = cbData; + + _mm256_zeroupper(); + + // + // Wipe the variables; + // + SymCryptWipeKnownSize(Wx.ymm, sizeof(Wx.ymm)); + } + + + + + // + // 2-way parallel message block processing + // + + // Core round function + // + // r : round number ( 0 <= r < 80) + // bl: message block index ( bl = 0, 1) + // + // The message words are generated by YMM code into the array Wx.ul[40][4]. + // Let W0, W1, ..., W15 be the message words from the first message block and + // Y0, Y1, ..., Y15 be the message words from the second message block. After message + // expansion, Wx.ul[][] array will take the following form: + // + // Wx.ul[40][4] = { + // W0, W1, Y0, Y1, + // W2, W3, Y2, Y3, + // ... + // W78, W79, Y78, Y79 + // }; + // +#define CROUND_2BLOCKS(a, b, c, d, e, f, g, h, r, bl ) { \ + UINT64 T1 = CSIGMA1(e) + CH(e, f, g) + Wx.ul[(r) / 2][2 * (bl) + ((r) & 1)] + SymCryptSha512K[r]; \ + UINT64 T2 = CSIGMA0(a) + MAJ(a, b, c); \ + h += T1; \ + d += h;\ + h += T2;\ +} + +// Message expansion of 2 message blocks for 2 rounds +#define SHA512_MSG_EXPAND_2BLOCKS_2ROUNDS(ind) { \ + __m256i t1 = _mm256_permute4x64_epi64(_mm256_blend_epi32(Wx.ymm[ind + 0], Wx.ymm[ind + 1], 0x33), 0xb1); \ + __m256i t2 = _mm256_permute4x64_epi64(_mm256_blend_epi32(Wx.ymm[ind + 4], Wx.ymm[ind + 5], 0x33), 0xb1); \ + __m256i s = _mm256_add_epi64(Wx.ymm[ind], _mm256_add_epi64(YMMLSIGMA0(t1), _mm256_add_epi64(YMMLSIGMA1(Wx.ymm[ind + 7]), t2))); \ + _mm256_store_si256(&Wx.ymm[ind + 8], s); \ +} + +// +// 16 rounds of 2-block message expansion with 16 rounds of message processing of the first message block +// +// This macro is called four times to generate 64 expanded message words and do 64 rounds of processing of +// first message block. The indices substituted for SYMCRYPT_SHA512_MS_2B_ROUND_YMM() (resp. CROUND_512_VAR_MS_2B() ) +// range from 0 to 31 (resp. 0 to 63). +#define SHA512_2BLOCKS_ROUND_STITCHED_16X(rb, ind) { \ + SHA512_MSG_EXPAND_2BLOCKS_2ROUNDS(rb + ind + 0); \ + CROUND_2BLOCKS(A, B, C, D, E, F, G, H, 2 * (rb + ind) + 0, 0); \ + CROUND_2BLOCKS(H, A, B, C, D, E, F, G, 2 * (rb + ind) + 1, 0); \ + SHA512_MSG_EXPAND_2BLOCKS_2ROUNDS(rb + ind + 1); \ + CROUND_2BLOCKS(G, H, A, B, C, D, E, F, 2 * (rb + ind) + 2, 0); \ + CROUND_2BLOCKS(F, G, H, A, B, C, D, E, 2 * (rb + ind) + 3, 0); \ + SHA512_MSG_EXPAND_2BLOCKS_2ROUNDS(rb + ind + 2); \ + CROUND_2BLOCKS(E, F, G, H, A, B, C, D, 2 * (rb + ind) + 4, 0); \ + CROUND_2BLOCKS(D, E, F, G, H, A, B, C, 2 * (rb + ind) + 5, 0); \ + SHA512_MSG_EXPAND_2BLOCKS_2ROUNDS(rb + ind + 3); \ + CROUND_2BLOCKS(C, D, E, F, G, H, A, B, 2 * (rb + ind) + 6, 0); \ + CROUND_2BLOCKS(B, C, D, E, F, G, H, A, 2 * (rb + ind) + 7, 0); \ + SHA512_MSG_EXPAND_2BLOCKS_2ROUNDS(rb + ind + 4); \ + CROUND_2BLOCKS(A, B, C, D, E, F, G, H, 2 * (rb + ind) + 8, 0); \ + CROUND_2BLOCKS(H, A, B, C, D, E, F, G, 2 * (rb + ind) + 9, 0); \ + SHA512_MSG_EXPAND_2BLOCKS_2ROUNDS(rb + ind + 5); \ + CROUND_2BLOCKS(G, H, A, B, C, D, E, F, 2 * (rb + ind) + 10, 0); \ + CROUND_2BLOCKS(F, G, H, A, B, C, D, E, 2 * (rb + ind) + 11, 0); \ + SHA512_MSG_EXPAND_2BLOCKS_2ROUNDS(rb + ind + 6); \ + CROUND_2BLOCKS(E, F, G, H, A, B, C, D, 2 * (rb + ind) + 12, 0); \ + CROUND_2BLOCKS(D, E, F, G, H, A, B, C, 2 * (rb + ind) + 13, 0); \ + SHA512_MSG_EXPAND_2BLOCKS_2ROUNDS(rb + ind + 7); \ + CROUND_2BLOCKS(C, D, E, F, G, H, A, B, 2 * (rb + ind) + 14, 0); \ + CROUND_2BLOCKS(B, C, D, E, F, G, H, A, 2 * (rb + ind) + 15, 0); \ +} + + +VOID +SYMCRYPT_CALL +SymCryptSha512AppendBlocks_ymm_2blocks( + _Inout_ SYMCRYPT_SHA512_CHAINING_STATE* pChain, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T* pcbRemaining) +{ + SYMCRYPT_ALIGN_AT(32) union { UINT64 ul[40][4]; __m256i ymm[40]; } Wx; + __m256i w1[4], w2[4]; + UINT64 A, B, C, D, E, F, G, H; + SIZE_T numBlocks; + + _mm256_zeroupper(); + + while (cbData >= SYMCRYPT_SHA512_INPUT_BLOCK_SIZE) + { + // Load message words from first block + // + // w1[0] = W3 W2 W1 W0 + // w1[1] = W7 W6 W5 W4 + // w1[2] = W11 W10 W9 W8 + // w1[3] = W15 W14 W13 W12 + // + numBlocks = 1; + w1[0] = _mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*) & pbData[0 * SYMCRYPT_SHA512_INPUT_BLOCK_SIZE + (0) * 32]), _mm256_load_si256((__m256i*)BYTE_REVERSE_64X2)); + w1[1] = _mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*) & pbData[0 * SYMCRYPT_SHA512_INPUT_BLOCK_SIZE + (1) * 32]), _mm256_load_si256((__m256i*)BYTE_REVERSE_64X2)); + w1[2] = _mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*) & pbData[0 * SYMCRYPT_SHA512_INPUT_BLOCK_SIZE + (2) * 32]), _mm256_load_si256((__m256i*)BYTE_REVERSE_64X2)); + w1[3] = _mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*) & pbData[0 * SYMCRYPT_SHA512_INPUT_BLOCK_SIZE + (3) * 32]), _mm256_load_si256((__m256i*)BYTE_REVERSE_64X2)); + + if (cbData >= (2 * SYMCRYPT_SHA512_INPUT_BLOCK_SIZE)) + { + // Load message words from second block + // + // w2[0] = Y3 Y2 Y1 Y0 + // w2[1] = Y7 Y6 Y5 Y4 + // w2[2] = Y11 Y10 Y9 Y8 + // w2[3] = Y15 Y14 Y13 Y12 + // + numBlocks = 2; + w2[0] = _mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*) & pbData[1 * SYMCRYPT_SHA512_INPUT_BLOCK_SIZE + (0) * 32]), _mm256_load_si256((__m256i*)BYTE_REVERSE_64X2)); + w2[1] = _mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*) & pbData[1 * SYMCRYPT_SHA512_INPUT_BLOCK_SIZE + (1) * 32]), _mm256_load_si256((__m256i*)BYTE_REVERSE_64X2)); + w2[2] = _mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*) & pbData[1 * SYMCRYPT_SHA512_INPUT_BLOCK_SIZE + (2) * 32]), _mm256_load_si256((__m256i*)BYTE_REVERSE_64X2)); + w2[3] = _mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*) & pbData[1 * SYMCRYPT_SHA512_INPUT_BLOCK_SIZE + (3) * 32]), _mm256_load_si256((__m256i*)BYTE_REVERSE_64X2)); + } + + // process first block and do the message expansion for two blocks at the same time + { + A = pChain->H[0]; + B = pChain->H[1]; + C = pChain->H[2]; + D = pChain->H[3]; + E = pChain->H[4]; + F = pChain->H[5]; + G = pChain->H[6]; + H = pChain->H[7]; + + // + // Combine message words from two blocks + // + // Wx.ymm[0] = Y1 Y0 W1 W0 + // ... ... + // Wx.ymm[7] = Y15 Y14 W15 W14 + // + Wx.ymm[0] = _mm256_permute2x128_si256(w1[0], w2[0], 0x20); + Wx.ymm[1] = _mm256_permute2x128_si256(w1[0], w2[0], 0x31); + Wx.ymm[2] = _mm256_permute2x128_si256(w1[1], w2[1], 0x20); + Wx.ymm[3] = _mm256_permute2x128_si256(w1[1], w2[1], 0x31); + Wx.ymm[4] = _mm256_permute2x128_si256(w1[2], w2[2], 0x20); + Wx.ymm[5] = _mm256_permute2x128_si256(w1[2], w2[2], 0x31); + Wx.ymm[6] = _mm256_permute2x128_si256(w1[3], w2[3], 0x20); + Wx.ymm[7] = _mm256_permute2x128_si256(w1[3], w2[3], 0x31); + + // Do the message expansion of two message blocks together with the + // processing of first 64 rounds of first message block + SHA512_2BLOCKS_ROUND_STITCHED_16X(0, 0); + SHA512_2BLOCKS_ROUND_STITCHED_16X(0, 8); + SHA512_2BLOCKS_ROUND_STITCHED_16X(16, 0); + SHA512_2BLOCKS_ROUND_STITCHED_16X(16, 8); + + // + // Last 16 rounds of round processing + // + CROUND_2BLOCKS(A, B, C, D, E, F, G, H, 64 + 0, 0); + CROUND_2BLOCKS(H, A, B, C, D, E, F, G, 64 + 1, 0); + CROUND_2BLOCKS(G, H, A, B, C, D, E, F, 64 + 2, 0); + CROUND_2BLOCKS(F, G, H, A, B, C, D, E, 64 + 3, 0); + CROUND_2BLOCKS(E, F, G, H, A, B, C, D, 64 + 4, 0); + CROUND_2BLOCKS(D, E, F, G, H, A, B, C, 64 + 5, 0); + CROUND_2BLOCKS(C, D, E, F, G, H, A, B, 64 + 6, 0); + CROUND_2BLOCKS(B, C, D, E, F, G, H, A, 64 + 7, 0); + + CROUND_2BLOCKS(A, B, C, D, E, F, G, H, 72 + 0, 0); + CROUND_2BLOCKS(H, A, B, C, D, E, F, G, 72 + 1, 0); + CROUND_2BLOCKS(G, H, A, B, C, D, E, F, 72 + 2, 0); + CROUND_2BLOCKS(F, G, H, A, B, C, D, E, 72 + 3, 0); + CROUND_2BLOCKS(E, F, G, H, A, B, C, D, 72 + 4, 0); + CROUND_2BLOCKS(D, E, F, G, H, A, B, C, 72 + 5, 0); + CROUND_2BLOCKS(C, D, E, F, G, H, A, B, 72 + 6, 0); + CROUND_2BLOCKS(B, C, D, E, F, G, H, A, 72 + 7, 0); + + pChain->H[0] = A + pChain->H[0]; + pChain->H[1] = B + pChain->H[1]; + pChain->H[2] = C + pChain->H[2]; + pChain->H[3] = D + pChain->H[3]; + pChain->H[4] = E + pChain->H[4]; + pChain->H[5] = F + pChain->H[5]; + pChain->H[6] = G + pChain->H[6]; + pChain->H[7] = H + pChain->H[7]; + } + + // second block + if(numBlocks > 1) + { + A = pChain->H[0]; + B = pChain->H[1]; + C = pChain->H[2]; + D = pChain->H[3]; + E = pChain->H[4]; + F = pChain->H[5]; + G = pChain->H[6]; + H = pChain->H[7]; + + for (int iterCount=0; iterCount<(80/8); iterCount++) + { + const int roundBase = iterCount*8; + CROUND_2BLOCKS(A, B, C, D, E, F, G, H, roundBase + 0, 1); + CROUND_2BLOCKS(H, A, B, C, D, E, F, G, roundBase + 1, 1); + CROUND_2BLOCKS(G, H, A, B, C, D, E, F, roundBase + 2, 1); + CROUND_2BLOCKS(F, G, H, A, B, C, D, E, roundBase + 3, 1); + CROUND_2BLOCKS(E, F, G, H, A, B, C, D, roundBase + 4, 1); + CROUND_2BLOCKS(D, E, F, G, H, A, B, C, roundBase + 5, 1); + CROUND_2BLOCKS(C, D, E, F, G, H, A, B, roundBase + 6, 1); + CROUND_2BLOCKS(B, C, D, E, F, G, H, A, roundBase + 7, 1); + } + + pChain->H[0] = A + pChain->H[0]; + pChain->H[1] = B + pChain->H[1]; + pChain->H[2] = C + pChain->H[2]; + pChain->H[3] = D + pChain->H[3]; + pChain->H[4] = E + pChain->H[4]; + pChain->H[5] = F + pChain->H[5]; + pChain->H[6] = G + pChain->H[6]; + pChain->H[7] = H + pChain->H[7]; + } + + pbData += (numBlocks * SYMCRYPT_SHA512_INPUT_BLOCK_SIZE); + cbData -= (numBlocks * SYMCRYPT_SHA512_INPUT_BLOCK_SIZE); + } + + *pcbRemaining = cbData; + + _mm256_zeroupper(); + + // + // Wipe the variables; + // + SymCryptWipeKnownSize(Wx.ymm, sizeof(Wx.ymm)); + SymCryptWipeKnownSize(w1, sizeof(w1)); + SymCryptWipeKnownSize(w2, sizeof(w2)); +} + + + +// +// 4-way parallel message block processing +// + + +// Initial loading of message words and endianness transformation. +// +// _bl : Number of message blocks to load, 1 <= bl <= 4. +// +// When bl < 4, the high order lanes of the YMM registers corresponding to the missing blocks are unused. +// +#define SHA512_MSG_LOAD_4BLOCKS(bl) { \ + for(int i = 0; i < bl; i++) \ + { \ + Wx.ymm[i + 0] = _mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*) &pbData[i * SYMCRYPT_SHA512_INPUT_BLOCK_SIZE + 0]), _mm256_load_si256((__m256i*)BYTE_REVERSE_64X2)); \ + Wx.ymm[i + 4] = _mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*) &pbData[i * SYMCRYPT_SHA512_INPUT_BLOCK_SIZE + 32]), _mm256_load_si256((__m256i*)BYTE_REVERSE_64X2)); \ + Wx.ymm[i + 8] = _mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*) &pbData[i * SYMCRYPT_SHA512_INPUT_BLOCK_SIZE + 64]), _mm256_load_si256((__m256i*)BYTE_REVERSE_64X2)); \ + Wx.ymm[i + 12] = _mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*) &pbData[i * SYMCRYPT_SHA512_INPUT_BLOCK_SIZE + 96]), _mm256_load_si256((__m256i*)BYTE_REVERSE_64X2)); \ + } \ +} + +// Shuffles the initially loaded message words from multiple blocks +// so that each YMM register contains message words with the same index +// within a block (e.g. Wx.ymm[0] contains the first words of each block). +// +// We have to use this macro four times to transform message blocks of 128-bytes. +// ind=0 processes the first quarter (32-bytes), ind=1 does the second quarter and so on. +// +#define SHA512_MSG_TRANSPOSE_QUARTER_4BLOCKS(ind) { \ + __m256i t1, t2, t3, t4; \ + t1 = _mm256_unpacklo_epi64(Wx.ymm[4 * (ind) + 0], Wx.ymm[4 * (ind) + 1]); \ + t2 = _mm256_unpacklo_epi64(Wx.ymm[4 * (ind) + 2], Wx.ymm[4 * (ind) + 3]); \ + t3 = _mm256_unpackhi_epi64(Wx.ymm[4 * (ind) + 0], Wx.ymm[4 * (ind) + 1]); \ + t4 = _mm256_unpackhi_epi64(Wx.ymm[4 * (ind) + 2], Wx.ymm[4 * (ind) + 3]); \ + Wx.ymm[4 * (ind) + 0] = _mm256_permute2x128_si256(t1, t2, 0x20); \ + Wx.ymm[4 * (ind) + 1] = _mm256_permute2x128_si256(t3, t4, 0x20); \ + Wx.ymm[4 * (ind) + 2] = _mm256_permute2x128_si256(t1, t2, 0x31); \ + Wx.ymm[4 * (ind) + 3] = _mm256_permute2x128_si256(t3, t4, 0x31); \ +} + +// Transpose all message words. Each SYMCRYPT_SHA512_MSG_TRANSPOSE_QUARTER_YMM() does the +// transposition for four message words (i.e. 0 1 2 3, 4 5 6 7, 8 9 10 11, 12 13 14 15) +#define SHA512_MSG_TRANSPOSE_4BLOCKS() { \ + SHA512_MSG_TRANSPOSE_QUARTER_4BLOCKS(0); \ + SHA512_MSG_TRANSPOSE_QUARTER_4BLOCKS(1); \ + SHA512_MSG_TRANSPOSE_QUARTER_4BLOCKS(2); \ + SHA512_MSG_TRANSPOSE_QUARTER_4BLOCKS(3); \ +} + +// One round message schedule, updates the rth message word, and adds the constants to message words for (r-16). +#define SHA512_MSG_EXPAND_4BLOCKS_1ROUND(r) { \ + Wx.ymm[r] = _mm256_add_epi64(_mm256_add_epi64(_mm256_add_epi64(Wx.ymm[r - 16], Wx.ymm[r - 7]), \ + YMMLSIGMA0(Wx.ymm[r - 15])), YMMLSIGMA1(Wx.ymm[r - 2])); \ + Wx.ymm[r - 16] = _mm256_add_epi64(Wx.ymm[r - 16], _mm256_set1_epi64x(SymCryptSha512K[r - 16])); \ +} + +// Four rounds of message schedule. Generates message words for rounds r, r+1, r+2, r+3. +#define SHA512_MSG_EXPAND_4BLOCKS_4ROUNDS(r) { \ + SHA512_MSG_EXPAND_4BLOCKS_1ROUND((r) + 0); SHA512_MSG_EXPAND_4BLOCKS_1ROUND((r) + 1); \ + SHA512_MSG_EXPAND_4BLOCKS_1ROUND((r) + 2); SHA512_MSG_EXPAND_4BLOCKS_1ROUND((r) + 3); \ +} + +// Sixteen rounds of message schedule. Generates message words for rounds r, ..., r+15. +#define SHA512_MSG_EXPAND_4BLOCKS_16ROUNDS(r) { \ + SHA512_MSG_EXPAND_4BLOCKS_4ROUNDS((r) + 0); SHA512_MSG_EXPAND_4BLOCKS_4ROUNDS((r) + 4); \ + SHA512_MSG_EXPAND_4BLOCKS_4ROUNDS((r) + 8); SHA512_MSG_EXPAND_4BLOCKS_4ROUNDS((r) + 12); \ +} + +// +// Core round for 4-way message expansion without constant addition +// +// r: round number (0 <= r < 80) +// +// bl: message block index (0 <= bl < 4) +// +// Message words for four blocks are store in Wx.ul[80][4] in interleaved form: +// W0 X0 Y0 Z0 +// W1 X1 Y1 Z1 +// ... +// W79 X79 Y79 Z79 +// +#define CROUND_4BLOCKS(a, b, c, d, e, f, g, h, r, bl ) { \ + UINT64 T1 = CSIGMA1(e) + CH(e, f, g) + Wx.ul4[r][bl]; \ + UINT64 T2 = CSIGMA0(a) + MAJ(a, b, c); \ + h += T1; \ + d += h; \ + h += T2; \ +} + +// Core round for single block +#define CROUND(a, b, c, d, e, f, g, h, r, r16) { \ + Wx.ul[r16] = Wt; \ + UINT64 T1 = CSIGMA1(e) + CH(e, f, g) + Wt + SymCryptSha512K[r]; \ + UINT64 T2 = CSIGMA0(a) + MAJ(a, b, c); \ + h += T1; \ + d += h;\ + h += T2;\ +} + +// Initial round for single block +#define IROUND( a, b, c, d, e, f, g, h, r ) { \ + Wt = SYMCRYPT_LOAD_MSBFIRST64( &pbData[ 8*r ] );\ + CROUND( a, b, c, d, e, f, g, h, r, r);\ +} + +// Full round for single block +#define FROUND( a, b, c, d, e, f, g, h, r, r16 ) { \ + Wt = LSIGMA1( Wx.ul[(r16-2) & 15] ) + Wx.ul[(r16-7) & 15] + \ + LSIGMA0( Wx.ul[(r16-15) & 15]) + Wx.ul[r16 & 15]; \ + CROUND( a, b, c, d, e, f, g, h, r, r16 ); \ +} + +// Constant addition and round processing for 8 rounds. Constants up to r=64 are added in message expansion. +// This macro is called to twice to add the constants do the round processing for the last 16 rounds. +#define SHA512_4BLOCKS_FINAL_ROUNDS_8X(rnd) { \ + Wx.ymm[rnd + 0] = _mm256_add_epi64(Wx.ymm[rnd + 0], _mm256_set1_epi64x(SymCryptSha512K[rnd + 0])); \ + Wx.ymm[rnd + 1] = _mm256_add_epi64(Wx.ymm[rnd + 1], _mm256_set1_epi64x(SymCryptSha512K[rnd + 1])); \ + Wx.ymm[rnd + 2] = _mm256_add_epi64(Wx.ymm[rnd + 2], _mm256_set1_epi64x(SymCryptSha512K[rnd + 2])); \ + Wx.ymm[rnd + 3] = _mm256_add_epi64(Wx.ymm[rnd + 3], _mm256_set1_epi64x(SymCryptSha512K[rnd + 3])); \ + CROUND_4BLOCKS(A, B, C, D, E, F, G, H, rnd + 0, 0); \ + CROUND_4BLOCKS(H, A, B, C, D, E, F, G, rnd + 1, 0); \ + CROUND_4BLOCKS(G, H, A, B, C, D, E, F, rnd + 2, 0); \ + CROUND_4BLOCKS(F, G, H, A, B, C, D, E, rnd + 3, 0); \ + Wx.ymm[rnd + 4] = _mm256_add_epi64(Wx.ymm[rnd + 4], _mm256_set1_epi64x(SymCryptSha512K[rnd + 4])); \ + Wx.ymm[rnd + 5] = _mm256_add_epi64(Wx.ymm[rnd + 5], _mm256_set1_epi64x(SymCryptSha512K[rnd + 5])); \ + Wx.ymm[rnd + 6] = _mm256_add_epi64(Wx.ymm[rnd + 6], _mm256_set1_epi64x(SymCryptSha512K[rnd + 6])); \ + Wx.ymm[rnd + 7] = _mm256_add_epi64(Wx.ymm[rnd + 7], _mm256_set1_epi64x(SymCryptSha512K[rnd + 7])); \ + CROUND_4BLOCKS(E, F, G, H, A, B, C, D, rnd + 4, 0); \ + CROUND_4BLOCKS(D, E, F, G, H, A, B, C, rnd + 5, 0); \ + CROUND_4BLOCKS(C, D, E, F, G, H, A, B, rnd + 6, 0); \ + CROUND_4BLOCKS(B, C, D, E, F, G, H, A, rnd + 7, 0); \ +} + + +VOID +SYMCRYPT_CALL +SymCryptSha512AppendBlocks_ymm_4blocks( + _Inout_ SYMCRYPT_SHA512_CHAINING_STATE* pChain, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T* pcbRemaining) +{ + SYMCRYPT_ALIGN_AT(32) union { UINT64 ul[16]; UINT64 ul4[80][4]; __m256i ymm[80]; } Wx; + UINT64 Wt; + UINT64 A, B, C, D, E, F, G, H; + UINT32 uWipeSize = (cbData >= (3 * SYMCRYPT_SHA512_INPUT_BLOCK_SIZE)) ? (80 * 4 * sizeof(UINT64)) : (16 * sizeof(UINT64)); + + + _mm256_zeroupper(); + + while (cbData >= (3 * SYMCRYPT_SHA512_INPUT_BLOCK_SIZE)) + { + SIZE_T numBlocks = (cbData >= 4 * SYMCRYPT_SHA512_INPUT_BLOCK_SIZE) ? 4 : (cbData / SYMCRYPT_SHA512_INPUT_BLOCK_SIZE); + + SHA512_MSG_LOAD_4BLOCKS(numBlocks); + SHA512_MSG_TRANSPOSE_4BLOCKS(); + + // + // Process the first block together with message expansion + // + A = pChain->H[0]; + B = pChain->H[1]; + C = pChain->H[2]; + D = pChain->H[3]; + E = pChain->H[4]; + F = pChain->H[5]; + G = pChain->H[6]; + H = pChain->H[7]; + + for (int iterCount=0; iterCount<(64/8); iterCount++) + { + const int roundBase = iterCount*8; + + SHA512_MSG_EXPAND_4BLOCKS_4ROUNDS(roundBase + 16); + CROUND_4BLOCKS(A, B, C, D, E, F, G, H, roundBase + 0, 0); + CROUND_4BLOCKS(H, A, B, C, D, E, F, G, roundBase + 1, 0); + CROUND_4BLOCKS(G, H, A, B, C, D, E, F, roundBase + 2, 0); + CROUND_4BLOCKS(F, G, H, A, B, C, D, E, roundBase + 3, 0); + + SHA512_MSG_EXPAND_4BLOCKS_4ROUNDS(roundBase + 20); + CROUND_4BLOCKS(E, F, G, H, A, B, C, D, roundBase + 4, 0); + CROUND_4BLOCKS(D, E, F, G, H, A, B, C, roundBase + 5, 0); + CROUND_4BLOCKS(C, D, E, F, G, H, A, B, roundBase + 6, 0); + CROUND_4BLOCKS(B, C, D, E, F, G, H, A, roundBase + 7, 0); + } + + // Last 16 rounds; add round constants and process. Message expansion is completed above. + SHA512_4BLOCKS_FINAL_ROUNDS_8X(64); + SHA512_4BLOCKS_FINAL_ROUNDS_8X(72); + + pChain->H[0] = A + pChain->H[0]; + pChain->H[1] = B + pChain->H[1]; + pChain->H[2] = C + pChain->H[2]; + pChain->H[3] = D + pChain->H[3]; + pChain->H[4] = E + pChain->H[4]; + pChain->H[5] = F + pChain->H[5]; + pChain->H[6] = G + pChain->H[6]; + pChain->H[7] = H + pChain->H[7]; + + // Process the remaining message blocks + for (int bl = 1; bl < numBlocks; bl++) + { + A = pChain->H[0]; + B = pChain->H[1]; + C = pChain->H[2]; + D = pChain->H[3]; + E = pChain->H[4]; + F = pChain->H[5]; + G = pChain->H[6]; + H = pChain->H[7]; + + for (int iterCount=0; iterCount<(80/8); iterCount++) + { + const int roundBase = iterCount*8; + + CROUND_4BLOCKS(A, B, C, D, E, F, G, H, roundBase + 0, bl); + CROUND_4BLOCKS(H, A, B, C, D, E, F, G, roundBase + 1, bl); + CROUND_4BLOCKS(G, H, A, B, C, D, E, F, roundBase + 2, bl); + CROUND_4BLOCKS(F, G, H, A, B, C, D, E, roundBase + 3, bl); + CROUND_4BLOCKS(E, F, G, H, A, B, C, D, roundBase + 4, bl); + CROUND_4BLOCKS(D, E, F, G, H, A, B, C, roundBase + 5, bl); + CROUND_4BLOCKS(C, D, E, F, G, H, A, B, roundBase + 6, bl); + CROUND_4BLOCKS(B, C, D, E, F, G, H, A, roundBase + 7, bl); + //CROUND_4BLOCKS(A, B, C, D, E, F, G, H, roundBase + 8, bl); + //CROUND_4BLOCKS(H, A, B, C, D, E, F, G, roundBase + 9, bl); + //CROUND_4BLOCKS(G, H, A, B, C, D, E, F, roundBase + 10, bl); + //CROUND_4BLOCKS(F, G, H, A, B, C, D, E, roundBase + 11, bl); + //CROUND_4BLOCKS(E, F, G, H, A, B, C, D, roundBase + 12, bl); + //CROUND_4BLOCKS(D, E, F, G, H, A, B, C, roundBase + 13, bl); + //CROUND_4BLOCKS(C, D, E, F, G, H, A, B, roundBase + 14, bl); + //CROUND_4BLOCKS(B, C, D, E, F, G, H, A, roundBase + 15, bl); + } + + pChain->H[0] = A + pChain->H[0]; + pChain->H[1] = B + pChain->H[1]; + pChain->H[2] = C + pChain->H[2]; + pChain->H[3] = D + pChain->H[3]; + pChain->H[4] = E + pChain->H[4]; + pChain->H[5] = F + pChain->H[5]; + pChain->H[6] = G + pChain->H[6]; + pChain->H[7] = H + pChain->H[7]; + } + + pbData += (numBlocks * SYMCRYPT_SHA512_INPUT_BLOCK_SIZE); + cbData -= (numBlocks * SYMCRYPT_SHA512_INPUT_BLOCK_SIZE); + } + + _mm256_zeroupper(); + + + // The vectorized version above consumes multiple blocks at a time. + // The remaining blocks if any are processed here. + while (cbData >= SYMCRYPT_SHA512_INPUT_BLOCK_SIZE) + { + A = pChain->H[0]; + B = pChain->H[1]; + C = pChain->H[2]; + D = pChain->H[3]; + E = pChain->H[4]; + F = pChain->H[5]; + G = pChain->H[6]; + H = pChain->H[7]; + + // + // initial rounds 1 to 16 + // + + IROUND(A, B, C, D, E, F, G, H, 0); + IROUND(H, A, B, C, D, E, F, G, 1); + IROUND(G, H, A, B, C, D, E, F, 2); + IROUND(F, G, H, A, B, C, D, E, 3); + IROUND(E, F, G, H, A, B, C, D, 4); + IROUND(D, E, F, G, H, A, B, C, 5); + IROUND(C, D, E, F, G, H, A, B, 6); + IROUND(B, C, D, E, F, G, H, A, 7); + IROUND(A, B, C, D, E, F, G, H, 8); + IROUND(H, A, B, C, D, E, F, G, 9); + IROUND(G, H, A, B, C, D, E, F, 10); + IROUND(F, G, H, A, B, C, D, E, 11); + IROUND(E, F, G, H, A, B, C, D, 12); + IROUND(D, E, F, G, H, A, B, C, 13); + IROUND(C, D, E, F, G, H, A, B, 14); + IROUND(B, C, D, E, F, G, H, A, 15); + + for (int iterCount=1; iterCount<(80/16); iterCount++) + { + const int roundBase = iterCount*16; + + FROUND(A, B, C, D, E, F, G, H, roundBase + 0, 0); + FROUND(H, A, B, C, D, E, F, G, roundBase + 1, 1); + FROUND(G, H, A, B, C, D, E, F, roundBase + 2, 2); + FROUND(F, G, H, A, B, C, D, E, roundBase + 3, 3); + FROUND(E, F, G, H, A, B, C, D, roundBase + 4, 4); + FROUND(D, E, F, G, H, A, B, C, roundBase + 5, 5); + FROUND(C, D, E, F, G, H, A, B, roundBase + 6, 6); + FROUND(B, C, D, E, F, G, H, A, roundBase + 7, 7); + FROUND(A, B, C, D, E, F, G, H, roundBase + 8, 8); + FROUND(H, A, B, C, D, E, F, G, roundBase + 9, 9); + FROUND(G, H, A, B, C, D, E, F, roundBase + 10, 10); + FROUND(F, G, H, A, B, C, D, E, roundBase + 11, 11); + FROUND(E, F, G, H, A, B, C, D, roundBase + 12, 12); + FROUND(D, E, F, G, H, A, B, C, roundBase + 13, 13); + FROUND(C, D, E, F, G, H, A, B, roundBase + 14, 14); + FROUND(B, C, D, E, F, G, H, A, roundBase + 15, 15); + } + + pChain->H[0] = A + pChain->H[0]; + pChain->H[1] = B + pChain->H[1]; + pChain->H[2] = C + pChain->H[2]; + pChain->H[3] = D + pChain->H[3]; + pChain->H[4] = E + pChain->H[4]; + pChain->H[5] = F + pChain->H[5]; + pChain->H[6] = G + pChain->H[6]; + pChain->H[7] = H + pChain->H[7]; + + pbData += SYMCRYPT_SHA512_INPUT_BLOCK_SIZE; + cbData -= SYMCRYPT_SHA512_INPUT_BLOCK_SIZE; + } + + *pcbRemaining = cbData; + + // + // Wipe the variables; + // + SymCryptWipe(&Wx, uWipeSize); +} + +#ifdef __clang__ +#pragma clang attribute pop +#else +#pragma GCC pop_options +#endif + +#endif // SYMCRYPT_CPU_AMD64 diff --git a/libs/symcrypt/lib/sha512.c b/libs/symcrypt/lib/sha512.c new file mode 100644 index 00000000000..0b763c54677 --- /dev/null +++ b/libs/symcrypt/lib/sha512.c @@ -0,0 +1,1715 @@ +// +// Sha512.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +// +// This module contains the routines to implement SHA2-512 from FIPS 180-2 +// + + +#include "precomp.h" + +// +// SHA-512 uses 80 magic constants of 64 bits each. These are +// referred to as K^{512}_i for i=0...79 by FIPS 180-2. +// We use a static array as that does not pollute the linker name space +// For performance we align to the cache line size of 64 bytes +// We have one extra value at the end to allow an XMM read from each element +// of the array. +// +SYMCRYPT_ALIGN_AT( 64 ) const UINT64 SymCryptSha512K[81] = { + 0x428a2f98d728ae22UL, 0x7137449123ef65cdUL, + 0xb5c0fbcfec4d3b2fUL, 0xe9b5dba58189dbbcUL, + 0x3956c25bf348b538UL, 0x59f111f1b605d019UL, + 0x923f82a4af194f9bUL, 0xab1c5ed5da6d8118UL, + 0xd807aa98a3030242UL, 0x12835b0145706fbeUL, + 0x243185be4ee4b28cUL, 0x550c7dc3d5ffb4e2UL, + 0x72be5d74f27b896fUL, 0x80deb1fe3b1696b1UL, + 0x9bdc06a725c71235UL, 0xc19bf174cf692694UL, + 0xe49b69c19ef14ad2UL, 0xefbe4786384f25e3UL, + 0x0fc19dc68b8cd5b5UL, 0x240ca1cc77ac9c65UL, + 0x2de92c6f592b0275UL, 0x4a7484aa6ea6e483UL, + 0x5cb0a9dcbd41fbd4UL, 0x76f988da831153b5UL, + 0x983e5152ee66dfabUL, 0xa831c66d2db43210UL, + 0xb00327c898fb213fUL, 0xbf597fc7beef0ee4UL, + 0xc6e00bf33da88fc2UL, 0xd5a79147930aa725UL, + 0x06ca6351e003826fUL, 0x142929670a0e6e70UL, + 0x27b70a8546d22ffcUL, 0x2e1b21385c26c926UL, + 0x4d2c6dfc5ac42aedUL, 0x53380d139d95b3dfUL, + 0x650a73548baf63deUL, 0x766a0abb3c77b2a8UL, + 0x81c2c92e47edaee6UL, 0x92722c851482353bUL, + 0xa2bfe8a14cf10364UL, 0xa81a664bbc423001UL, + 0xc24b8b70d0f89791UL, 0xc76c51a30654be30UL, + 0xd192e819d6ef5218UL, 0xd69906245565a910UL, + 0xf40e35855771202aUL, 0x106aa07032bbd1b8UL, + 0x19a4c116b8d2d0c8UL, 0x1e376c085141ab53UL, + 0x2748774cdf8eeb99UL, 0x34b0bcb5e19b48a8UL, + 0x391c0cb3c5c95a63UL, 0x4ed8aa4ae3418acbUL, + 0x5b9cca4f7763e373UL, 0x682e6ff3d6b2b8a3UL, + 0x748f82ee5defb2fcUL, 0x78a5636f43172f60UL, + 0x84c87814a1f0ab72UL, 0x8cc702081a6439ecUL, + 0x90befffa23631e28UL, 0xa4506cebde82bde9UL, + 0xbef9a3f7b2c67915UL, 0xc67178f2e372532bUL, + 0xca273eceea26619cUL, 0xd186b8c721c0c207UL, + 0xeada7dd6cde0eb1eUL, 0xf57d4f7fee6ed178UL, + 0x06f067aa72176fbaUL, 0x0a637dc5a2c898a6UL, + 0x113f9804bef90daeUL, 0x1b710b35131c471bUL, + 0x28db77f523047d84UL, 0x32caab7b40c72493UL, + 0x3c9ebe0a15c9bebcUL, 0x431d67c49c100d4cUL, + 0x4cc5d4becb3e42b6UL, 0x597f299cfc657e2aUL, + 0x5fcb6fab3ad6faecUL, 0x6c44198c4a475817UL, +}; + +// +// Initial states +// +const UINT64 SymCryptSha512InitialState[8] = { + 0x6a09e667f3bcc908UL, + 0xbb67ae8584caa73bUL, + 0x3c6ef372fe94f82bUL, + 0xa54ff53a5f1d36f1UL, + 0x510e527fade682d1UL, + 0x9b05688c2b3e6c1fUL, + 0x1f83d9abfb41bd6bUL, + 0x5be0cd19137e2179UL, +}; + +const UINT64 SymCryptSha384InitialState[8] = { + 0xcbbb9d5dc1059ed8UL, + 0x629a292a367cd507UL, + 0x9159015a3070dd17UL, + 0x152fecd8f70e5939UL, + 0x67332667ffc00b31UL, + 0x8eb44a8768581511UL, + 0xdb0c2e0d64f98fa7UL, + 0x47b5481dbefa4fa4UL, +}; + +const UINT64 SymCryptSha512_224InitialState[8] = { + 0x8c3d37c819544da2UL, + 0x73e1996689dcd4d6UL, + 0x1dfab7ae32ff9c82UL, + 0x679dd514582f9fcfUL, + 0x0f6d2b697bd44da8UL, + 0x77e36f7304c48942UL, + 0x3f9d85a86a1d36c8UL, + 0x1112e6ad91d692a1UL, +}; + +const UINT64 SymCryptSha512_256InitialState[8] = { + 0x22312194fc2bf72cUL, + 0x9f555fa3c84c64c2UL, + 0x2393b86b6f53b151UL, + 0x963877195940eabdUL, + 0x96283ee2a88effe3UL, + 0xbe5e1e2553863992UL, + 0x2b0199fc2c85b8aaUL, + 0x0eb72ddc81c52ca2UL, +}; + + +// +// Todo: this structure pulls in the SHA284 code anytime someone uses +// SHA-512; should be split into a separate file. +// +const SYMCRYPT_HASH SymCryptSha384Algorithm_default = { + &SymCryptSha384Init, + &SymCryptSha384Append, + &SymCryptSha384Result, + &SymCryptSha512AppendBlocks, + &SymCryptSha384StateCopy, + sizeof( SYMCRYPT_SHA384_STATE ), + SYMCRYPT_SHA384_RESULT_SIZE, + SYMCRYPT_SHA384_INPUT_BLOCK_SIZE, + SYMCRYPT_FIELD_OFFSET( SYMCRYPT_SHA384_STATE, chain ), + SYMCRYPT_FIELD_SIZE( SYMCRYPT_SHA384_STATE, chain ), +}; + +const SYMCRYPT_HASH SymCryptSha512Algorithm_default = { + &SymCryptSha512Init, + &SymCryptSha512Append, + &SymCryptSha512Result, + &SymCryptSha512AppendBlocks, + &SymCryptSha512StateCopy, + sizeof( SYMCRYPT_SHA512_STATE ), + SYMCRYPT_SHA512_RESULT_SIZE, + SYMCRYPT_SHA512_INPUT_BLOCK_SIZE, + SYMCRYPT_FIELD_OFFSET( SYMCRYPT_SHA512_STATE, chain ), + SYMCRYPT_FIELD_SIZE( SYMCRYPT_SHA512_STATE, chain ), +}; + +const SYMCRYPT_HASH SymCryptSha512_224Algorithm_default = { + &SymCryptSha512_224Init, + &SymCryptSha512_224Append, + &SymCryptSha512_224Result, + &SymCryptSha512AppendBlocks, + &SymCryptSha512_224StateCopy, + sizeof( SYMCRYPT_SHA512_224_STATE ), + SYMCRYPT_SHA512_224_RESULT_SIZE, + SYMCRYPT_SHA512_224_INPUT_BLOCK_SIZE, + SYMCRYPT_FIELD_OFFSET( SYMCRYPT_SHA512_224_STATE, chain ), + SYMCRYPT_FIELD_SIZE( SYMCRYPT_SHA512_224_STATE, chain ), +}; + +const SYMCRYPT_HASH SymCryptSha512_256Algorithm_default = { + &SymCryptSha512_256Init, + &SymCryptSha512_256Append, + &SymCryptSha512_256Result, + &SymCryptSha512AppendBlocks, + &SymCryptSha512_256StateCopy, + sizeof( SYMCRYPT_SHA512_256_STATE ), + SYMCRYPT_SHA512_256_RESULT_SIZE, + SYMCRYPT_SHA512_256_INPUT_BLOCK_SIZE, + SYMCRYPT_FIELD_OFFSET( SYMCRYPT_SHA512_256_STATE, chain ), + SYMCRYPT_FIELD_SIZE( SYMCRYPT_SHA512_256_STATE, chain ), +}; + +const PCSYMCRYPT_HASH SymCryptSha384Algorithm = &SymCryptSha384Algorithm_default; +const PCSYMCRYPT_HASH SymCryptSha512Algorithm = &SymCryptSha512Algorithm_default; +const PCSYMCRYPT_HASH SymCryptSha512_224Algorithm = &SymCryptSha512_224Algorithm_default; +const PCSYMCRYPT_HASH SymCryptSha512_256Algorithm = &SymCryptSha512_256Algorithm_default; + +// +// SymCryptSha384 +// +#define ALG SHA384 +#define Alg Sha384 +#include "hash_pattern.c" +#undef ALG +#undef Alg + +// +// SymCryptSha512 +// +#define ALG SHA512 +#define Alg Sha512 +#include "hash_pattern.c" +#undef ALG +#undef Alg + +// +// SymCryptSha512/224 +// +#define ALG SHA512_224 +#define Alg Sha512_224 +#include "hash_pattern.c" +#undef ALG +#undef Alg + +// +// SymCryptSha512/256 +// +#define ALG SHA512_256 +#define Alg Sha512_256 +#include "hash_pattern.c" +#undef ALG +#undef Alg + + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptSha512Init( _Out_ PSYMCRYPT_SHA512_STATE pState ) +{ + SYMCRYPT_SET_MAGIC( pState ); + + pState->dataLengthH = 0; + pState->dataLengthL = 0; + pState->bytesInBuffer = 0; + + memcpy( &pState->chain.H[0], &SymCryptSha512InitialState[0], sizeof( SymCryptSha512InitialState ) ); + + // + // There is no need to initialize the buffer part of the state as that will be + // filled before it is used. + // +} + + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptSha384Init( _Out_ PSYMCRYPT_SHA384_STATE pState ) +{ + SYMCRYPT_SET_MAGIC( pState ); + + pState->dataLengthH = 0; + pState->dataLengthL = 0; + pState->bytesInBuffer = 0; + + memcpy( &pState->chain.H[0], &SymCryptSha384InitialState[0], sizeof( SymCryptSha384InitialState ) ); + + // + // There is no need to initialize the buffer part of the state as that will be + // filled before it is used. + // +} + + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptSha512_224Init( _Out_ PSYMCRYPT_SHA512_224_STATE pState ) +{ + SYMCRYPT_SET_MAGIC( pState ); + + pState->dataLengthH = 0; + pState->dataLengthL = 0; + pState->bytesInBuffer = 0; + + memcpy( &pState->chain.H[0], &SymCryptSha512_224InitialState[0], sizeof( SymCryptSha512_224InitialState ) ); + + // + // There is no need to initialize the buffer part of the state as that will be + // filled before it is used. + // +} + + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptSha512_256Init( _Out_ PSYMCRYPT_SHA512_256_STATE pState ) +{ + SYMCRYPT_SET_MAGIC( pState ); + + pState->dataLengthH = 0; + pState->dataLengthL = 0; + pState->bytesInBuffer = 0; + + memcpy( &pState->chain.H[0], &SymCryptSha512_256InitialState[0], sizeof( SymCryptSha512_256InitialState ) ); + + // + // There is no need to initialize the buffer part of the state as that will be + // filled before it is used. + // +} + + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptSha512Append( + _Inout_ PSYMCRYPT_SHA512_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + UINT32 bytesInBuffer; + UINT32 freeInBuffer; + SIZE_T tmp; + + SYMCRYPT_CHECK_MAGIC( pState ); + + pState->dataLengthL += cbData; + if( pState->dataLengthL < cbData ) { + pState->dataLengthH++; + } + + bytesInBuffer = pState->bytesInBuffer; + + // + // If previous data in buffer, buffer new input and transform if possible. + // + if( bytesInBuffer > 0 ) + { + SYMCRYPT_ASSERT( SYMCRYPT_SHA512_INPUT_BLOCK_SIZE > bytesInBuffer ); + + freeInBuffer = SYMCRYPT_SHA512_INPUT_BLOCK_SIZE - bytesInBuffer; + if( cbData < freeInBuffer ) + { + // + // All the data will fit in the buffer. + // We don't do anything here. + // As cbData < inputBlockSize the bulk data processing is skipped, + // and the data will be copied to the buffer at the end + // of this code. + } else { + // + // Enough data to fill the whole buffer & process it + // + memcpy(&pState->buffer[bytesInBuffer], pbData, freeInBuffer); + pbData += freeInBuffer; + cbData -= freeInBuffer; + SymCryptSha512AppendBlocks( &pState->chain, &pState->buffer[0], SYMCRYPT_SHA512_INPUT_BLOCK_SIZE, &tmp ); + + bytesInBuffer = 0; + } + } + + // + // Internal buffer is empty; process all remaining whole blocks in the input + // + if( cbData >= SYMCRYPT_SHA512_INPUT_BLOCK_SIZE ) + { + SymCryptSha512AppendBlocks( &pState->chain, pbData, cbData, &tmp ); + SYMCRYPT_ASSERT( tmp < SYMCRYPT_SHA512_INPUT_BLOCK_SIZE ); + pbData += cbData - tmp; + cbData = tmp; + } + + SYMCRYPT_ASSERT( cbData < SYMCRYPT_SHA512_INPUT_BLOCK_SIZE ); + + // + // buffer remaining input if necessary. + // + if( cbData > 0 ) + { + memcpy( &pState->buffer[bytesInBuffer], pbData, cbData ); + bytesInBuffer += (UINT32) cbData; + } + + pState->bytesInBuffer = bytesInBuffer; + +} + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptSha384Append( + _Inout_ PSYMCRYPT_SHA384_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + + SymCryptSha512Append( (PSYMCRYPT_SHA512_STATE)pState, pbData, cbData ); + +} + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptSha512_224Append( + _Inout_ PSYMCRYPT_SHA512_224_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + SymCryptSha512Append( (PSYMCRYPT_SHA512_STATE)pState, pbData, cbData ); +} + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptSha512_256Append( + _Inout_ PSYMCRYPT_SHA512_256_STATE pState, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData ) +{ + SymCryptSha512Append( (PSYMCRYPT_SHA512_STATE)pState, pbData, cbData ); +} + + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptSha512Result( + _Inout_ PSYMCRYPT_SHA512_STATE pState, + _Out_writes_( SYMCRYPT_SHA512_RESULT_SIZE ) PBYTE pbResult ) +{ + UINT32 bytesInBuffer; + SIZE_T tmp; + + SYMCRYPT_CHECK_MAGIC( pState ); + + bytesInBuffer = pState->bytesInBuffer; + + // + // The buffer is never completely full, so we can always put the first + // padding byte in. + // + pState->buffer[bytesInBuffer++] = 0x80; + + if( bytesInBuffer > 128-16 ) { + // + // No room for the rest of the padding. Pad with zeroes & process block + // bytesInBuffer is at most 128, so we do not have an integer underflow + // + SymCryptWipe( &pState->buffer[bytesInBuffer], 128-bytesInBuffer ); + SymCryptSha512AppendBlocks( &pState->chain, pState->buffer, 128, &tmp ); + bytesInBuffer = 0; + } + + // + // Set rest of padding + // We wipe to the end of the buffer as it is 16-aligned, + // and it is faster to wipe to an aligned point + // + SymCryptWipe( &pState->buffer[bytesInBuffer], 128-bytesInBuffer ); + SYMCRYPT_STORE_MSBFIRST64( &pState->buffer[128-16], (pState->dataLengthH << 3) + (pState->dataLengthL >> 61) ); + SYMCRYPT_STORE_MSBFIRST64( &pState->buffer[128- 8], (pState->dataLengthL << 3) ); + + SymCryptSha512AppendBlocks( &pState->chain, pState->buffer, 128, &tmp ); + + SymCryptUint64ToMsbFirst( &pState->chain.H[0], pbResult, 8 ); + + // + // We have to wipe the whole state because the Init call + // might be optimized away by a smart compiler. + // + SymCryptWipeKnownSize( pState, sizeof( *pState ) ); + + SYMCRYPT_SET_MAGIC( pState ); + + memcpy( &pState->chain.H[0], &SymCryptSha512InitialState[0], sizeof( SymCryptSha512InitialState ) ); + } + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptSha384Result( + _Inout_ PSYMCRYPT_SHA384_STATE pState, + _Out_writes_( SYMCRYPT_SHA384_RESULT_SIZE ) PBYTE pbResult ) +{ + // + // For simplicity we re-use SymCryptSha512Result. This is slightly slower, + // but SHA-384 isn't used that much. + // + SYMCRYPT_ALIGN BYTE sha512Result[SYMCRYPT_SHA512_RESULT_SIZE]; // Buffer for SHA-512 output + + // + // The SHA-384 result is the first 48 bytes of the SHA-512 result of our state + // + SymCryptSha512Result( (PSYMCRYPT_SHA512_STATE)pState, sha512Result ); + memcpy( pbResult, sha512Result, SYMCRYPT_SHA384_RESULT_SIZE ); + + // + // The buffer was already wiped by the SymCryptSha512Result function, we + // just have to re-initialize for SHA-384 + // + SymCryptSha384Init( pState ); + + SymCryptWipeKnownSize( sha512Result, sizeof( sha512Result ) ); +} + + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptSha512_224Result( + _Inout_ PSYMCRYPT_SHA512_224_STATE pState, + _Out_writes_( SYMCRYPT_SHA512_224_RESULT_SIZE ) PBYTE pbResult ) +{ + SYMCRYPT_ALIGN BYTE sha512Result[SYMCRYPT_SHA512_RESULT_SIZE]; // Buffer for SHA-512 output + + // + // The SHA-512/224 result is the first 28 bytes of the SHA-512 result of our state + // + SymCryptSha512Result( (PSYMCRYPT_SHA512_STATE)pState, sha512Result ); + memcpy( pbResult, sha512Result, SYMCRYPT_SHA512_224_RESULT_SIZE ); + + // + // The buffer was already wiped by the SymCryptSha512Result function, we + // just have to re-initialize for SHA-512/224 + // + SymCryptSha512_224Init( pState ); + + SymCryptWipeKnownSize( sha512Result, sizeof( sha512Result ) ); +} + + +SYMCRYPT_NOINLINE +VOID +SYMCRYPT_CALL +SymCryptSha512_256Result( + _Inout_ PSYMCRYPT_SHA512_256_STATE pState, + _Out_writes_( SYMCRYPT_SHA512_256_RESULT_SIZE ) PBYTE pbResult ) +{ + SYMCRYPT_ALIGN BYTE sha512Result[SYMCRYPT_SHA512_RESULT_SIZE]; // Buffer for SHA-512 output + + // + // The SHA-512/256 result is the first 32 bytes of the SHA-512 result of our state + // + SymCryptSha512Result( (PSYMCRYPT_SHA512_STATE)pState, sha512Result ); + memcpy( pbResult, sha512Result, SYMCRYPT_SHA512_256_RESULT_SIZE ); + + // + // The buffer was already wiped by the SymCryptSha512Result function, we + // just have to re-initialize for SHA-512/256 + // + SymCryptSha512_256Init( pState ); + + SymCryptWipeKnownSize( sha512Result, sizeof( sha512Result ) ); +} + + +VOID +SYMCRYPT_CALL +SymCryptSha512StateExportCore( + _In_ PCSYMCRYPT_SHA512_STATE pState, + _Out_writes_bytes_( SYMCRYPT_SHA512_STATE_EXPORT_SIZE ) PBYTE pbBlob, + _In_ UINT32 type ) +{ + SYMCRYPT_ALIGN SYMCRYPT_SHA512_STATE_EXPORT_BLOB blob; // local copy to have proper alignment. + C_ASSERT( sizeof( blob ) == SYMCRYPT_SHA512_STATE_EXPORT_SIZE ); + + SYMCRYPT_CHECK_MAGIC( pState ); + + SymCryptWipeKnownSize( &blob, sizeof( blob ) ); // wipe to avoid any data leakage + + blob.header.magic = SYMCRYPT_BLOB_MAGIC; + blob.header.size = SYMCRYPT_SHA512_STATE_EXPORT_SIZE; + blob.header.type = type; + + // + // Copy the relevant data. Buffer will be 0-padded. + // + + SymCryptUint64ToMsbFirst( &pState->chain.H[0], &blob.chain[0], 8 ); + blob.dataLengthL = pState->dataLengthL; + blob.dataLengthH = pState->dataLengthH; + memcpy( &blob.buffer[0], &pState->buffer[0], blob.dataLengthL & 0x7f ); + + SYMCRYPT_ASSERT( (PCBYTE) &blob + sizeof( blob ) - sizeof( SYMCRYPT_BLOB_TRAILER ) == (PCBYTE) &blob.trailer ); + SymCryptMarvin32( SymCryptMarvin32DefaultSeed, (PCBYTE) &blob, sizeof( blob ) - sizeof( SYMCRYPT_BLOB_TRAILER ), &blob.trailer.checksum[0] ); + + memcpy( pbBlob, &blob, sizeof( blob ) ); + +//cleanup: + SymCryptWipeKnownSize( &blob, sizeof( blob ) ); + return; +} + +VOID +SYMCRYPT_CALL +SymCryptSha512StateExport( + _In_ PCSYMCRYPT_SHA512_STATE pState, + _Out_writes_bytes_( SYMCRYPT_SHA512_STATE_EXPORT_SIZE ) PBYTE pbBlob ) +{ + SymCryptSha512StateExportCore( pState, pbBlob, SymCryptBlobTypeSha512State ); +} + +VOID +SYMCRYPT_CALL +SymCryptSha384StateExport( + _In_ PCSYMCRYPT_SHA384_STATE pState, + _Out_writes_bytes_( SYMCRYPT_SHA384_STATE_EXPORT_SIZE ) PBYTE pbBlob ) +{ + SymCryptSha512StateExportCore( (PCSYMCRYPT_SHA512_STATE)pState, pbBlob, SymCryptBlobTypeSha384State ); +} + +VOID +SYMCRYPT_CALL +SymCryptSha512_224StateExport( + _In_ PCSYMCRYPT_SHA512_224_STATE pState, + _Out_writes_bytes_( SYMCRYPT_SHA512_224_STATE_EXPORT_SIZE ) PBYTE pbBlob ) +{ + SymCryptSha512StateExportCore( (PCSYMCRYPT_SHA512_STATE)pState, pbBlob, SymCryptBlobTypeSha512_224State ); +} + +VOID +SYMCRYPT_CALL +SymCryptSha512_256StateExport( + _In_ PCSYMCRYPT_SHA512_256_STATE pState, + _Out_writes_bytes_( SYMCRYPT_SHA512_256_STATE_EXPORT_SIZE ) PBYTE pbBlob ) +{ + SymCryptSha512StateExportCore( (PCSYMCRYPT_SHA512_STATE)pState, pbBlob, SymCryptBlobTypeSha512_256State ); +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSha512StateImportCore( + _Out_ PSYMCRYPT_SHA512_STATE pState, + _In_reads_bytes_( SYMCRYPT_SHA512_STATE_EXPORT_SIZE) PCBYTE pbBlob, + _In_ UINT32 type ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + SYMCRYPT_ALIGN SYMCRYPT_SHA512_STATE_EXPORT_BLOB blob; // local copy to have proper alignment. + BYTE checksum[8]; + + C_ASSERT( sizeof( blob ) == SYMCRYPT_SHA512_STATE_EXPORT_SIZE ); + memcpy( &blob, pbBlob, sizeof( blob ) ); + + if( blob.header.magic != SYMCRYPT_BLOB_MAGIC || + blob.header.size != SYMCRYPT_SHA512_STATE_EXPORT_SIZE || + blob.header.type != type ) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + SymCryptMarvin32( SymCryptMarvin32DefaultSeed, (PCBYTE) &blob, sizeof( blob ) - sizeof( SYMCRYPT_BLOB_TRAILER ), checksum ); + if( memcmp( checksum, &blob.trailer.checksum[0], 8 ) != 0 ) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + SymCryptMsbFirstToUint64( &blob.chain[0], &pState->chain.H[0], 8 ); + pState->dataLengthL = blob.dataLengthL; + pState->dataLengthH = blob.dataLengthH; + pState->bytesInBuffer = blob.dataLengthL & 0x7f; + memcpy( &pState->buffer[0], &blob.buffer[0], pState->bytesInBuffer ); + + SYMCRYPT_SET_MAGIC( pState ); + +cleanup: + SymCryptWipeKnownSize( &blob, sizeof(blob) ); + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSha512StateImport( + _Out_ PSYMCRYPT_SHA512_STATE pState, + _In_reads_bytes_( SYMCRYPT_SHA512_STATE_EXPORT_SIZE) PCBYTE pbBlob ) +{ + return SymCryptSha512StateImportCore( pState, pbBlob, SymCryptBlobTypeSha512State ); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSha384StateImport( + _Out_ PSYMCRYPT_SHA384_STATE pState, + _In_reads_bytes_( SYMCRYPT_SHA384_STATE_EXPORT_SIZE) PCBYTE pbBlob ) +{ + return SymCryptSha512StateImportCore( (PSYMCRYPT_SHA512_STATE)pState, pbBlob, SymCryptBlobTypeSha384State ); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSha512_224StateImport( + _Out_ PSYMCRYPT_SHA512_224_STATE pState, + _In_reads_bytes_( SYMCRYPT_SHA512_224_STATE_EXPORT_SIZE) PCBYTE pbBlob ) +{ + return SymCryptSha512StateImportCore( (PSYMCRYPT_SHA512_STATE)pState, pbBlob, SymCryptBlobTypeSha512_224State ); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSha512_256StateImport( + _Out_ PSYMCRYPT_SHA512_256_STATE pState, + _In_reads_bytes_( SYMCRYPT_SHA512_256_STATE_EXPORT_SIZE) PCBYTE pbBlob ) +{ + return SymCryptSha512StateImportCore( (PSYMCRYPT_SHA512_STATE)pState, pbBlob, SymCryptBlobTypeSha512_256State ); +} + + +// +// A simple test case intended for module testing for +// FIPS compliance. +// This is the one-block example message from FIPS 180-2 appendix C +// + +const BYTE SymCryptSha512KATAnswer[64] = +{ + 0xdd, 0xaf, 0x35, 0xa1, 0x93, 0x61, 0x7a, 0xba, + 0xcc, 0x41, 0x73, 0x49, 0xae, 0x20, 0x41, 0x31, + 0x12, 0xe6, 0xfa, 0x4e, 0x89, 0xa9, 0x7e, 0xa2, + 0x0a, 0x9e, 0xee, 0xe6, 0x4b, 0x55, 0xd3, 0x9a, + 0x21, 0x92, 0x99, 0x2a, 0x27, 0x4f, 0xc1, 0xa8, + 0x36, 0xba, 0x3c, 0x23, 0xa3, 0xfe, 0xeb, 0xbd, + 0x45, 0x4d, 0x44, 0x23, 0x64, 0x3c, 0xe8, 0x0e, + 0x2a, 0x9a, 0xc9, 0x4f, 0xa5, 0x4c, 0xa4, 0x9f, +}; + +VOID +SYMCRYPT_CALL +SymCryptSha512Selftest(void) +{ + BYTE result[SYMCRYPT_SHA512_RESULT_SIZE]; + + SymCryptSha512( SymCryptTestMsg3, sizeof( SymCryptTestMsg3 ), result ); + + SymCryptInjectError( result, sizeof( result ) ); + + if( memcmp( result, SymCryptSha512KATAnswer, sizeof( result ) ) != 0 ) { + SymCryptFatal( 'SH51' ); + } +} + +// +// A simple test case intended for module testing for +// FIPS compliance. +// This is the one-block example message from FIPS 180-2 appendix D +// + +const BYTE SymCryptSha384KATAnswer[ 48 ] = +{ + 0xcb, 0x00, 0x75, 0x3f, 0x45, 0xa3, 0x5e, 0x8b, + 0xb5, 0xa0, 0x3d, 0x69, 0x9a, 0xc6, 0x50, 0x07, + 0x27, 0x2c, 0x32, 0xab, 0x0e, 0xde, 0xd1, 0x63, + 0x1a, 0x8b, 0x60, 0x5a, 0x43, 0xff, 0x5b, 0xed, + 0x80, 0x86, 0x07, 0x2b, 0xa1, 0xe7, 0xcc, 0x23, + 0x58, 0xba, 0xec, 0xa1, 0x34, 0xc8, 0x25, 0xa7, +}; + +VOID +SYMCRYPT_CALL +SymCryptSha384Selftest(void) +{ + BYTE result[SYMCRYPT_SHA384_RESULT_SIZE]; + + SymCryptSha384( SymCryptTestMsg3, sizeof( SymCryptTestMsg3 ), result ); + + SymCryptInjectError( result, sizeof( result ) ); + + if( memcmp( result, SymCryptSha384KATAnswer, sizeof( result ) ) != 0 ) { + SymCryptFatal( 'SH38' ); + } +} + +// +// Simple test vector for FIPS module testing +// + +const BYTE SymCryptSha512_224KATAnswer[ 28 ] = +{ + 0x46, 0x34, 0x27, 0x0f, 0x70, 0x7b, 0x6a, 0x54, + 0xda, 0xae, 0x75, 0x30, 0x46, 0x08, 0x42, 0xe2, + 0x0e, 0x37, 0xed, 0x26, 0x5c, 0xee, 0xe9, 0xa4, + 0x3e, 0x89, 0x24, 0xaa, +}; + +VOID +SYMCRYPT_CALL +SymCryptSha512_224Selftest(void) +{ + BYTE result[SYMCRYPT_SHA512_224_RESULT_SIZE]; + + SymCryptSha512_224( SymCryptTestMsg3, sizeof( SymCryptTestMsg3 ), result ); + + SymCryptInjectError( result, sizeof( result ) ); + + if( memcmp( result, SymCryptSha512_224KATAnswer, sizeof( result ) ) != 0 ) { + SymCryptFatal( 'SH51' ); + } +} + +// +// Simple test vector for FIPS module testing +// + +const BYTE SymCryptSha512_256KATAnswer[ 32 ] = +{ + 0x53, 0x04, 0x8e, 0x26, 0x81, 0x94, 0x1e, 0xf9, + 0x9b, 0x2e, 0x29, 0xb7, 0x6b, 0x4c, 0x7d, 0xab, + 0xe4, 0xc2, 0xd0, 0xc6, 0x34, 0xfc, 0x6d, 0x46, + 0xe0, 0xe2, 0xf1, 0x31, 0x07, 0xe7, 0xaf, 0x23, +}; + +VOID +SYMCRYPT_CALL +SymCryptSha512_256Selftest(void) +{ + BYTE result[SYMCRYPT_SHA512_256_RESULT_SIZE]; + + SymCryptSha512_256( SymCryptTestMsg3, sizeof( SymCryptTestMsg3 ), result ); + + SymCryptInjectError( result, sizeof( result ) ); + + if( memcmp( result, SymCryptSha512_256KATAnswer, sizeof( result ) ) != 0 ) { + SymCryptFatal( 'SH51' ); + } +} + +// +// We keep multiple implementations in this file. +// This allows us to switch different platforms to different implementations, whichever +// is faster. Even if we don't use a particular implementation in one release, +// we keep it around in case it becomes the preferred one for a new CPU release. +// (Performance can change a lot with changes in micro-architecture.) +// + +//=================================================================================== +// Implementation of compression function using UINT64s +// + +// +// For documentation on these function see FIPS 180-2 +// +// MAJ and CH are the functions Maj and Ch from the standard. +// CSIGMA0 and CSIGMA1 are the capital sigma functions. +// LSIGMA0 and LSIGMA1 are the lowercase sigma functions. +// +// The canonical definitions of the MAJ and CH functions are: +//#define MAJ( x, y, z ) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) +//#define CH( x, y, z ) (((x) & (y)) ^ ((~(x)) & (z))) +// We use optimized versions defined below +// +#define MAJ( x, y, z ) ((((z) | (y)) & (x) ) | ((z) & (y))) +#define CH( x, y, z ) ((((z) ^ (y)) & (x)) ^ (z)) + +// +// The four Sigma functions +// + +//#define CSIGMA0( x ) (ROR64((x), 28) ^ ROR64((x), 34) ^ ROR64((x), 39)) +//#define CSIGMA1( x ) (ROR64((x), 14) ^ ROR64((x), 18) ^ ROR64((x), 41)) +//#define LSIGMA0( x ) (ROR64((x), 1) ^ ROR64((x), 8) ^ ((x)>> 7)) +//#define LSIGMA1( x ) (ROR64((x), 19) ^ ROR64((x), 61) ^ ((x)>> 6)) + +#define CSIGMA0( x ) (ROR64((ROR64((x), 6) ^ ROR64((x), 11) ^ (x)), 28)) +#define CSIGMA1( x ) (ROR64((ROR64((x), 4) ^ ROR64((x), 27) ^ (x)), 14)) +#define LSIGMA0( x ) (ROR64((x) ^ ROR64((x), 7), 1) ^ ((x)>> 7)) +#define LSIGMA1( x ) (ROR64((x) ^ ROR64((x), 42), 19) ^ ((x)>> 6)) + + + +// +// The values a-h were stored in an array called ah. +// We have unrolled the loop 16 times. This makes both the indices into +// the ah array constant, and it makes the message addressing constant. +// This provides a significant speed improvement, at the cost of making +// the main loop about 4 kB in code. +// +// Initial round; r16 is the round number mod 16 +// ah[ r16 &7] = h +// ah[(r16+1)&7] = g; +// ah[(r16+2)&7] = f; +// ah[(r16+3)&7] = e; +// ah[(r16+4)&7] = d; +// ah[(r16+5)&7] = c; +// ah[(r16+6)&7] = b; +// ah[(r16+7)&7] = a; +// +// Unfortunately, the compiler seems to choke on this, allocating an extra variable for +// each of the array indices, with duplicate stores to both locations. +// + +// +// The core round, after the message word has been computed for this round and put in Wt. +// r16 is the round number modulo 16. (Static after loop unrolling) +// r is the round number +#define CROUND( a, b, c, d, e, f, g, h, r, r16 ) {;\ + W[r16] = Wt; \ + h += CSIGMA1(e) + CH(e, f, g) + SymCryptSha512K[r] + Wt;\ + d += h;\ + h += CSIGMA0(a) + MAJ(a, b, c);\ +} + +// +// Initial round that reads the message. +// r is the round number 0..15 +// +#define IROUND( a, b, c, d, e, f, g, h, r ) {\ + Wt = SYMCRYPT_LOAD_MSBFIRST64( &pbData[ 8*r ] );\ + CROUND( a, b, c, d, e, f, g, h, r, r);\ + } +// +// Subsequent rounds. +// r is the round number, r16 is the round number mod 16. +// These are separate as typically r is run-time and r16 is compile time constant. +// +#define FROUND( a, b, c, d, e, f, g, h, r, r16 ) { \ + Wt = LSIGMA1( W[(r16-2) & 15] ) + W[(r16-7) & 15] + \ + LSIGMA0( W[(r16-15) & 15]) + W[r16 & 15]; \ + CROUND( a, b, c, d, e, f, g, h, r, r16 ); \ + } + +// +// This is the core routine that does the actual hard work +// This is based on the older one in RSA32LIB by Scott Field from 2001 +// +VOID +SYMCRYPT_CALL +SymCryptSha512AppendBlocks_ull( + _Inout_ SYMCRYPT_SHA512_CHAINING_STATE * pChain, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ) +{ + SYMCRYPT_ALIGN UINT64 W[16]; + UINT64 A, B, C, D, E, F, G, H; + int round; + UINT64 Wt; + + + while( cbData >= 128 ) + { + A = pChain->H[0]; + B = pChain->H[1]; + C = pChain->H[2]; + D = pChain->H[3]; + E = pChain->H[4]; + F = pChain->H[5]; + G = pChain->H[6]; + H = pChain->H[7]; + + // + // initial rounds 1 to 16 + // + + IROUND( A, B, C, D, E, F, G, H, 0 ); + IROUND( H, A, B, C, D, E, F, G, 1 ); + IROUND( G, H, A, B, C, D, E, F, 2 ); + IROUND( F, G, H, A, B, C, D, E, 3 ); + IROUND( E, F, G, H, A, B, C, D, 4 ); + IROUND( D, E, F, G, H, A, B, C, 5 ); + IROUND( C, D, E, F, G, H, A, B, 6 ); + IROUND( B, C, D, E, F, G, H, A, 7 ); + IROUND( A, B, C, D, E, F, G, H, 8 ); + IROUND( H, A, B, C, D, E, F, G, 9 ); + IROUND( G, H, A, B, C, D, E, F, 10 ); + IROUND( F, G, H, A, B, C, D, E, 11 ); + IROUND( E, F, G, H, A, B, C, D, 12 ); + IROUND( D, E, F, G, H, A, B, C, 13 ); + IROUND( C, D, E, F, G, H, A, B, 14 ); + IROUND( B, C, D, E, F, G, H, A, 15 ); + + for( round=16; round<80; round += 16 ) + { + FROUND( A, B, C, D, E, F, G, H, round + 0, 0 ); + FROUND( H, A, B, C, D, E, F, G, round + 1, 1 ); + FROUND( G, H, A, B, C, D, E, F, round + 2, 2 ); + FROUND( F, G, H, A, B, C, D, E, round + 3, 3 ); + FROUND( E, F, G, H, A, B, C, D, round + 4, 4 ); + FROUND( D, E, F, G, H, A, B, C, round + 5, 5 ); + FROUND( C, D, E, F, G, H, A, B, round + 6, 6 ); + FROUND( B, C, D, E, F, G, H, A, round + 7, 7 ); + FROUND( A, B, C, D, E, F, G, H, round + 8, 8 ); + FROUND( H, A, B, C, D, E, F, G, round + 9, 9 ); + FROUND( G, H, A, B, C, D, E, F, round + 10, 10 ); + FROUND( F, G, H, A, B, C, D, E, round + 11, 11 ); + FROUND( E, F, G, H, A, B, C, D, round + 12, 12 ); + FROUND( D, E, F, G, H, A, B, C, round + 13, 13 ); + FROUND( C, D, E, F, G, H, A, B, round + 14, 14 ); + FROUND( B, C, D, E, F, G, H, A, round + 15, 15 ); + } + + pChain->H[0] = A + pChain->H[0]; + pChain->H[1] = B + pChain->H[1]; + pChain->H[2] = C + pChain->H[2]; + pChain->H[3] = D + pChain->H[3]; + pChain->H[4] = E + pChain->H[4]; + pChain->H[5] = F + pChain->H[5]; + pChain->H[6] = G + pChain->H[6]; + pChain->H[7] = H + pChain->H[7]; + + pbData += 128; + cbData -= 128; + } + + *pcbRemaining = cbData; + + // + // Wipe the variables; + // + SymCryptWipeKnownSize( W, sizeof( W ) ); + SYMCRYPT_FORCE_WRITE64( &A, 0 ); + SYMCRYPT_FORCE_WRITE64( &B, 0 ); + SYMCRYPT_FORCE_WRITE64( &C, 0 ); + SYMCRYPT_FORCE_WRITE64( &D, 0 ); + SYMCRYPT_FORCE_WRITE64( &E, 0 ); + SYMCRYPT_FORCE_WRITE64( &F, 0 ); + SYMCRYPT_FORCE_WRITE64( &G, 0 ); + SYMCRYPT_FORCE_WRITE64( &H, 0 ); + SYMCRYPT_FORCE_WRITE64( &Wt, 0 ); +} + +// +// UINT64 based implementation that +// first computes the expanded message, and then the +// actual hash computation. +// It tries to use fewer registers; this is probably a good approach for CPUs with only 8 +// 64-bit registers; which is what you would use on x86 XMM, but we have XMM code below. +// This uses more memory, but might allow better register re-use and thereby +// reduce the number of load/stores. +// + +VOID +SYMCRYPT_CALL +SymCryptSha512AppendBlocks_ull2( + _Inout_ SYMCRYPT_SHA512_CHAINING_STATE * pChain, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ) +{ + SYMCRYPT_ALIGN UINT64 buf[4 + 8 + 80]; // 4 words original chaining state, chaining state, and expanded input block + UINT64 * W = &buf[4 + 8]; + UINT64 * ha = &buf[4]; // initial state words, in order h, g, ..., b, a + UINT64 A, B, C, D, T; + int r; + + ha[7] = pChain->H[0]; buf[3] = ha[7]; + ha[6] = pChain->H[1]; buf[2] = ha[6]; + ha[5] = pChain->H[2]; buf[1] = ha[5]; + ha[4] = pChain->H[3]; buf[0] = ha[4]; + ha[3] = pChain->H[4]; + ha[2] = pChain->H[5]; + ha[1] = pChain->H[6]; + ha[0] = pChain->H[7]; + + while( cbData >= 128 ) + { + + // + // Capture the input into W[0..15] + // + for( r=0; r<16; r+= 2 ) + { + W[r ] = SYMCRYPT_LOAD_MSBFIRST64( &pbData[ 8* r ] ); + W[r+1] = SYMCRYPT_LOAD_MSBFIRST64( &pbData[ 8*(r+1) ] ); + } + + // + // Expand the message + // + A = W[15]; + B = W[14]; + D = W[0]; + for( r=16; r<80; r+= 2 ) + { + // Loop invariant: A=W[r-1], B = W[r-2], D = W[r-16] + + // + // Macro for one word of message expansion. + // Invariant: + // on entry: a = W[r-1], b = W[r-2], d = W[r-16] + // on exit: W[r] computed, a = W[r-1], b = W[r], c = W[r-15] + // + #define EXPAND( a, b, c, d, r ) \ + c = W[r-15]; \ + b = d + LSIGMA1( b ) + W[r-7] + LSIGMA0( c ); \ + W[r] = b; \ + + EXPAND( A, B, C, D, r ); + EXPAND( B, A, D, C, (r+1)); + + #undef EXPAND + } + + A = ha[7]; + B = ha[6]; + C = ha[5]; + D = ha[4]; + + for( r=0; r<80; r += 4 ) + { + // + // Loop invariant: + // A, B, C, and D are the a,b,c,d values of the current state. + // W[r] is the next expanded message word to be processed. + // W[r-8 .. r-5] contain the current state words h, g, f, e. + // + + // + // Macro to compute one round + // + #define DO_ROUND( a, b, c, d, t, r ) \ + t = W[r] + CSIGMA1( W[r-5] ) + W[r-8] + CH( W[r-5], W[r-6], W[r-7] ) + SymCryptSha512K[r]; \ + W[r-4] = t + d; \ + d = t + CSIGMA0( a ) + MAJ( c, b, a ); + + DO_ROUND( A, B, C, D, T, r ); + DO_ROUND( D, A, B, C, T, (r+1) ); + DO_ROUND( C, D, A, B, T, (r+2) ); + DO_ROUND( B, C, D, A, T, (r+3) ); + #undef DO_ROUND + } + + buf[3] = ha[7] = buf[3] + A; + buf[2] = ha[6] = buf[2] + B; + buf[1] = ha[5] = buf[1] + C; + buf[0] = ha[4] = buf[0] + D; + ha[3] += W[r-5]; + ha[2] += W[r-6]; + ha[1] += W[r-7]; + ha[0] += W[r-8]; + + pbData += 128; + cbData -= 128; + } + + pChain->H[0] = ha[7]; + pChain->H[1] = ha[6]; + pChain->H[2] = ha[5]; + pChain->H[3] = ha[4]; + pChain->H[4] = ha[3]; + pChain->H[5] = ha[2]; + pChain->H[6] = ha[1]; + pChain->H[7] = ha[0]; + + *pcbRemaining = cbData; + + // + // Wipe the variables; + // + SymCryptWipeKnownSize( buf, sizeof( buf ) ); + SYMCRYPT_FORCE_WRITE64( &A, 0 ); + SYMCRYPT_FORCE_WRITE64( &B, 0 ); + SYMCRYPT_FORCE_WRITE64( &C, 0 ); + SYMCRYPT_FORCE_WRITE64( &D, 0 ); + SYMCRYPT_FORCE_WRITE64( &T, 0 ); + +} + +// +// UINT64 based implementation that +// first computes the expanded message, and then the +// actual hash computation. +// This one uses more registers than the previous one. +// + +VOID +SYMCRYPT_CALL +SymCryptSha512AppendBlocks_ull3( + _Inout_ SYMCRYPT_SHA512_CHAINING_STATE * pChain, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ) +{ + SYMCRYPT_ALIGN UINT64 W[80]; + SYMCRYPT_ALIGN UINT64 ha[8]; + UINT64 A, B, C, D, E, F, G, H; + int r; + + ha[7] = pChain->H[0]; + ha[6] = pChain->H[1]; + ha[5] = pChain->H[2]; + ha[4] = pChain->H[3]; + ha[3] = pChain->H[4]; + ha[2] = pChain->H[5]; + ha[1] = pChain->H[6]; + ha[0] = pChain->H[7]; + + while( cbData >= 128 ) + { + + // + // Capture the input into W[0..15] + // + for( r=0; r<16; r+= 2 ) + { + W[r ] = SYMCRYPT_LOAD_MSBFIRST64( &pbData[ 8* r ] ); + W[r+1] = SYMCRYPT_LOAD_MSBFIRST64( &pbData[ 8*(r+1) ] ); + } + + // + // Expand the message + // + A = W[15]; + B = W[14]; + D = W[0]; + for( r=16; r<80; r+= 2 ) + { + // Loop invariant: A=W[r-1], B = W[r-2], D = W[r-16] + + // + // Macro for one word of message expansion. + // Invariant: + // on entry: a = W[r-1], b = W[r-2], d = W[r-16] + // on exit: W[r] computed, a = W[r-1], b = W[r], c = W[r-15] + // + #define EXPAND( a, b, c, d, r ) \ + c = W[r-15]; \ + b = d + LSIGMA1( b ) + W[r-7] + LSIGMA0( c ); \ + W[r] = b; \ + + EXPAND( A, B, C, D, r ); + EXPAND( B, A, D, C, (r+1)); + + #undef EXPAND + } + + A = ha[7]; + B = ha[6]; + C = ha[5]; + D = ha[4]; + E = ha[3]; + F = ha[2]; + G = ha[1]; + H = ha[0]; + + for( r=0; r<80; r += 8 ) + { + // + // Loop invariant: + // A, B, C, and D, E, F, G, H, are the values of the current state. + // W[r] is the next expanded message word to be processed. + // + + // + // Macro to compute one round + // + #define DO_ROUND( a, b, c, d, e, f, g, h, r ) \ + h += W[r] + CSIGMA1( e ) + CH( e, f, g ) + SymCryptSha512K[r]; \ + d += h; \ + h += CSIGMA0( a ) + MAJ( c, b, a ); + + DO_ROUND( A, B, C, D, E, F, G, H, (r ) ); + DO_ROUND( H, A, B, C, D, E, F, G, (r+1) ); + DO_ROUND( G, H, A, B, C, D, E, F, (r+2) ); + DO_ROUND( F, G, H, A, B, C, D, E, (r+3) ); + DO_ROUND( E, F, G, H, A, B, C, D, (r+4) ); + DO_ROUND( D, E, F, G, H, A, B, C, (r+5) ); + DO_ROUND( C, D, E, F, G, H, A, B, (r+6) ); + DO_ROUND( B, C, D, E, F, G, H, A, (r+7) ); + #undef DO_ROUND + } + + ha[7] += A; + ha[6] += B; + ha[5] += C; + ha[4] += D; + ha[3] += E; + ha[2] += F; + ha[1] += G; + ha[0] += H; + + pbData += 128; + cbData -= 128; + } + + pChain->H[0] = ha[7]; + pChain->H[1] = ha[6]; + pChain->H[2] = ha[5]; + pChain->H[3] = ha[4]; + pChain->H[4] = ha[3]; + pChain->H[5] = ha[2]; + pChain->H[6] = ha[1]; + pChain->H[7] = ha[0]; + + *pcbRemaining = cbData; + + // + // Wipe the variables; + // + SymCryptWipeKnownSize( W, sizeof( W ) ); + SymCryptWipeKnownSize( ha, sizeof( ha ) ); + SYMCRYPT_FORCE_WRITE64( &A, 0 ); + SYMCRYPT_FORCE_WRITE64( &B, 0 ); + SYMCRYPT_FORCE_WRITE64( &C, 0 ); + SYMCRYPT_FORCE_WRITE64( &D, 0 ); + SYMCRYPT_FORCE_WRITE64( &E, 0 ); + SYMCRYPT_FORCE_WRITE64( &F, 0 ); + SYMCRYPT_FORCE_WRITE64( &G, 0 ); + SYMCRYPT_FORCE_WRITE64( &H, 0 ); +} + +#undef MAJ +#undef CH +#undef CSIGMA0 +#undef CSIGMA1 +#undef LSIGMA0 +#undef LSIGMA1 +#undef CROUND +#undef IROUND +#undef FROUND + +//====================================================================================== +// Implementation using Xmm registers +// +#if SYMCRYPT_CPU_X86 // only on X86; AMD64 is faster when using UINT64s + +#ifdef __clang__ +#pragma clang attribute push (__attribute__((target("ssse3"))), apply_to=function) +#else +#pragma GCC push_options +#pragma GCC target("ssse3") +#endif + +#if SYMCRYPT_MS_VC +#ifndef _mm_storeu_si64 + // Workaround missing intrinsic on some versions of MSVC + #define _mm_storeu_si64(p, a) (_mm_storel_epi64((__m128i*)(p), (a))) +#endif +#endif + +#define XMMADD( _a, _b ) _mm_add_epi64((_a), (_b)) +#define XMMAND( _a, _b ) _mm_and_si128((_a), (_b)) +#define XMMOR( _a, _b ) _mm_or_si128((_a), (_b)) +#define XMMROR( _a, _n ) _mm_xor_si128( _mm_slli_epi64( (_a), 64-(_n)), _mm_srli_epi64( (_a), (_n)) ) +#define XMMSHR( _a, _n ) _mm_srli_epi64((_a), (_n)) +#define XMMXOR( _a, _b ) _mm_xor_si128((_a), (_b)) +#define XMMSTORE_UINT64( _a, _addr ) _mm_storeu_si64((_addr), (_a)) + +#define XMMMAJ( x, y, z ) XMMOR( XMMAND( XMMOR( (z), (y)), (x)), XMMAND( (z), (y) ) ) +#define XMMCH( x, y, z ) XMMXOR( XMMAND( XMMXOR( (z), (y) ), (x)), (z)) +#define XMMCSIGMA0( x ) XMMXOR( XMMXOR( XMMROR((x), 28), XMMROR((x), 34)), XMMROR((x), 39)) +#define XMMCSIGMA1( x ) XMMXOR( XMMXOR( XMMROR((x), 14), XMMROR((x), 18)), XMMROR((x), 41)) +#define XMMLSIGMA0( x ) XMMXOR( XMMXOR( XMMROR((x), 1), XMMROR((x), 8)), XMMSHR((x), 7)) +#define XMMLSIGMA1( x ) XMMXOR( XMMXOR( XMMROR((x), 19), XMMROR((x), 61)), XMMSHR((x), 6)) + +// +// Core round takes two arguments: r16 = round number modulo 16, r = round number - r16. +// On entry, Wt must be equal to the sum of the round constant and the expanded message word for this round. +// Only the lower word of each Xmm register is used. +// +#define XMMCROUND( r16, r ) {;\ + ah[r16 & 7] = XMMADD( XMMADD( XMMADD( ah[r16 & 7], XMMCSIGMA1(ah[(r16+3)&7]) ), XMMCH(ah[(r16+3)&7], ah[(r16+2)&7], ah[(r16+1)&7]) ), Wt );\ + ah[(r16+4)&7] = XMMADD( ah[(r16+4)&7], ah[r16 &7] );\ + ah[r16 & 7] = XMMADD( XMMADD( ah[r16 & 7], XMMCSIGMA0(ah[(r16+7)&7])), XMMMAJ(ah[(r16+7)&7], ah[(r16+6)&7], ah[(r16+5)&7]) );\ +} + +#pragma warning( disable: 4127 ) // conditional expression is constant + +// +// Initial round; reads data and performs a round. +// Data is read in 128-bit chunks every other round. +// +#define XMMIROUND( r ) {\ + if( (r&1) == 0 ) \ + { \ + Wt = _mm_loadu_si128( (__m128i *)&pbData[ 8*r ] ); \ + Wt = _mm_shuffle_epi8( Wt, BYTE_REVERSE_64 ); \ + W[r/2] = Wt; \ + Wt = XMMADD( Wt, _mm_load_si128( (__m128i *)&SymCryptSha512K[r] ) ); \ + Ws = _mm_srli_si128( Wt, 8 ); \ + } else {\ + Wt = Ws;\ + }\ + XMMCROUND( r, r );\ +} + +// +// Working version of XMMIROUND: +// Wt = XMMFROM_MSBF( &pbData[ 8*r ] );\ +// W[r] = Wt;\ +// Wt = XMMADD( XMMFROM_UINT64(SymCryptSha512K[r]), Wt );\ +// XMMCROUND(r,r);\ + +#define XMMFROUND(r16, rb) { \ + if( (r16 & 1) == 0 ) \ + {\ + Wt = XMMADD( XMMADD( XMMADD( XMMLSIGMA1( W[((r16 - 2)&15)/2] ), \ + _mm_alignr_epi8( W[((r16 - 6)&15)/2], W[((r16 - 7)&15)/2], 8 ) ), \ + XMMLSIGMA0( _mm_alignr_epi8( W[((r16 - 14)&15)/2], W[((r16 - 15)&15)/2], 8 ) ) ), \ + W[((r16 - 16)&15)/2] ); \ + W[r16/2] = Wt;\ + Ws = _mm_load_si128( (__m128i *)&SymCryptSha512K[r16 + rb] );\ + Wt = XMMADD( Ws , Wt );\ + Ws = _mm_srli_si128( Wt, 8 );\ + } else {\ + Wt = Ws;\ + }\ + XMMCROUND( r16, r16+rb ); \ +} + +VOID +SYMCRYPT_CALL +SymCryptSha512AppendBlocks_xmm( + _Inout_ SYMCRYPT_SHA512_CHAINING_STATE * pChain, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ) +{ + SYMCRYPT_ALIGN __m128i W[8]; // message expansion buffer, 8 elements each storing 2 consecutive UINT64s + SYMCRYPT_ALIGN __m128i ah[8]; + SYMCRYPT_ALIGN __m128i feedf[8]; + int round; + __m128i Wt, Ws; + const __m128i BYTE_REVERSE_64 = _mm_set_epi8( 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7 ); + + Wt = _mm_loadu_si128( (__m128i *) &pChain->H[0] ); + feedf[7] = ah[7] = Wt; + feedf[6] = ah[6] = _mm_srli_si128( Wt, 8 ); + Wt = _mm_loadu_si128( (__m128i *) &pChain->H[2] ); + feedf[5] = ah[5] = Wt; + feedf[4] = ah[4] = _mm_srli_si128( Wt, 8 ); + Wt = _mm_loadu_si128( (__m128i *) &pChain->H[4] ); + feedf[3] = ah[3] = Wt; + feedf[2] = ah[2] = _mm_srli_si128( Wt, 8 ); + Wt = _mm_loadu_si128( (__m128i *) &pChain->H[6] ); + feedf[1] = ah[1] = Wt; + feedf[0] = ah[0] = _mm_srli_si128( Wt, 8 ); + + while( cbData >= 128 ) + { + // + // initial rounds 1 to 16 + // + + XMMIROUND( 0 ); + XMMIROUND( 1 ); + XMMIROUND( 2 ); + XMMIROUND( 3 ); + XMMIROUND( 4 ); + XMMIROUND( 5 ); + XMMIROUND( 6 ); + XMMIROUND( 7 ); + XMMIROUND( 8 ); + XMMIROUND( 9 ); + XMMIROUND( 10 ); + XMMIROUND( 11 ); + XMMIROUND( 12 ); + XMMIROUND( 13 ); + XMMIROUND( 14 ); + XMMIROUND( 15 ); + + for( round=16; round<80; round += 16 ) + { + XMMFROUND( 0, round ); + XMMFROUND( 1, round ); + XMMFROUND( 2, round ); + XMMFROUND( 3, round ); + XMMFROUND( 4, round ); + XMMFROUND( 5, round ); + XMMFROUND( 6, round ); + XMMFROUND( 7, round ); + XMMFROUND( 8, round ); + XMMFROUND( 9, round ); + XMMFROUND( 10, round ); + XMMFROUND( 11, round ); + XMMFROUND( 12, round ); + XMMFROUND( 13, round ); + XMMFROUND( 14, round ); + XMMFROUND( 15, round ); + } + + feedf[0] = ah[0] = XMMADD( ah[0], feedf[0] ); + feedf[1] = ah[1] = XMMADD( ah[1], feedf[1] ); + feedf[2] = ah[2] = XMMADD( ah[2], feedf[2] ); + feedf[3] = ah[3] = XMMADD( ah[3], feedf[3] ); + feedf[4] = ah[4] = XMMADD( ah[4], feedf[4] ); + feedf[5] = ah[5] = XMMADD( ah[5], feedf[5] ); + feedf[6] = ah[6] = XMMADD( ah[6], feedf[6] ); + feedf[7] = ah[7] = XMMADD( ah[7], feedf[7] ); + + pbData += 128; + cbData -= 128; + + } + + XMMSTORE_UINT64( ah[7], &(pChain->H[0]) ); + XMMSTORE_UINT64( ah[6], &(pChain->H[1]) ); + XMMSTORE_UINT64( ah[5], &(pChain->H[2]) ); + XMMSTORE_UINT64( ah[4], &(pChain->H[3]) ); + XMMSTORE_UINT64( ah[3], &(pChain->H[4]) ); + XMMSTORE_UINT64( ah[2], &(pChain->H[5]) ); + XMMSTORE_UINT64( ah[1], &(pChain->H[6]) ); + XMMSTORE_UINT64( ah[0], &(pChain->H[7]) ); + + *pcbRemaining = cbData; + + // + // Wipe the variables; + // + SymCryptWipeKnownSize( ah, sizeof( ah ) ); + SymCryptWipeKnownSize( feedf, sizeof( feedf ) ); + SymCryptWipeKnownSize( W, sizeof( W ) ); + SymCryptWipeKnownSize( &Wt, sizeof( Wt )); + SymCryptWipeKnownSize( &Ws, sizeof( Ws )); +} + +#ifdef __clang__ +#pragma clang attribute pop +#else +#pragma GCC pop_options +#endif + +#endif + + + +//====================================================================================== +// Implementation using NEON registers +// +#if SYMCRYPT_CPU_ARM + + +#define ROR( _a, _n ) vorr_u64( vshl_n_u64( _a, 64 - _n ), vshr_n_u64( _a, _n ) ) +#define ADD( x, y ) vadd_u64( (x), (y) ) + +#define MAJ( x, y, z ) vorr_u64( vand_u64( vorr_u64( (z), (y)), (x)), vand_u64( (z), (y) ) ) +#define CH( x, y, z ) veor_u64( vand_u64( veor_u64( (z), (y) ), (x)), (z)) +#define CSIGMA0( x ) veor_u64( veor_u64( ROR((x), 28), ROR((x), 34)), ROR((x), 39)) +#define CSIGMA1( x ) veor_u64( veor_u64( ROR((x), 14), ROR((x), 18)), ROR((x), 41)) +#define LSIGMA0( x ) veor_u64( veor_u64( ROR((x), 1), ROR((x), 8)), vshr_n_u64((x), 7)) +#define LSIGMA1( x ) veor_u64( veor_u64( ROR((x), 19), ROR((x), 61)), vshr_n_u64((x), 6)) + +// +// r = round number, r16 = r mod 16 (often a compile-time constant when r is not) +// +#define CROUND( a, b, c, d, e, f, g, h, r, r16 ) {\ + W[r16] = Wt; \ + h = ADD( h, ADD( ADD( ADD( CSIGMA1(e), CH(e, f, g)), *(__n64 *)&SymCryptSha512K[r]), Wt ));\ + d = ADD( d, h );\ + h = ADD( h, ADD( CSIGMA0(a), MAJ(a, b, c)));\ +} + +// +// Initial round that reads the message. +// r is the round number 0..15 +// +#define IROUND( a, b, c, d, e, f, g, h, r ) {\ + Wt = vmov_n_u64( SYMCRYPT_LOAD_MSBFIRST64( &pbData[ 8*r ] ) );\ + CROUND( a, b, c, d, e, f, g, h, r, r);\ + } +// +// Subsequent rounds. +// r is the round number, r16 is the round number mod 16. +// These are separate as typically r is run-time and r16 is compile time constant. +// +#define FROUND( a, b, c, d, e, f, g, h, r, r16 ) { \ + Wt = ADD( ADD( LSIGMA1( W[(r16-2) & 15] ), LSIGMA0( W[(r16-15) & 15])) , ADD( W[(r16-7) & 15], W[r16 & 15])); \ + CROUND( a, b, c, d, e, f, g, h, r, r16 ); \ + } + +// +// This is the core routine that does the actual hard work +// This is based on the older one in RSA32LIB by Scott Field from 2001 +// +VOID +SYMCRYPT_CALL +SymCryptSha512AppendBlocks_neon( + _Inout_ SYMCRYPT_SHA512_CHAINING_STATE * pChain, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ) +{ + SYMCRYPT_ALIGN __n64 W[16]; + __n64 A, B, C, D, E, F, G, H; + int round; + __n64 Wt; + __n64 * pH = (__n64 *) &pChain->H[0]; + + A = pH[0]; + B = pH[1]; + C = pH[2]; + D = pH[3]; + E = pH[4]; + F = pH[5]; + G = pH[6]; + H = pH[7]; + + while( cbData >= 128 ) + { + // + // initial rounds 1 to 16 + // + + IROUND( A, B, C, D, E, F, G, H, 0 ); + IROUND( H, A, B, C, D, E, F, G, 1 ); + IROUND( G, H, A, B, C, D, E, F, 2 ); + IROUND( F, G, H, A, B, C, D, E, 3 ); + IROUND( E, F, G, H, A, B, C, D, 4 ); + IROUND( D, E, F, G, H, A, B, C, 5 ); + IROUND( C, D, E, F, G, H, A, B, 6 ); + IROUND( B, C, D, E, F, G, H, A, 7 ); + IROUND( A, B, C, D, E, F, G, H, 8 ); + IROUND( H, A, B, C, D, E, F, G, 9 ); + IROUND( G, H, A, B, C, D, E, F, 10 ); + IROUND( F, G, H, A, B, C, D, E, 11 ); + IROUND( E, F, G, H, A, B, C, D, 12 ); + IROUND( D, E, F, G, H, A, B, C, 13 ); + IROUND( C, D, E, F, G, H, A, B, 14 ); + IROUND( B, C, D, E, F, G, H, A, 15 ); + + for( round=16; round<80; round += 16 ) + { + FROUND( A, B, C, D, E, F, G, H, round + 0, 0 ); + FROUND( H, A, B, C, D, E, F, G, round + 1, 1 ); + FROUND( G, H, A, B, C, D, E, F, round + 2, 2 ); + FROUND( F, G, H, A, B, C, D, E, round + 3, 3 ); + FROUND( E, F, G, H, A, B, C, D, round + 4, 4 ); + FROUND( D, E, F, G, H, A, B, C, round + 5, 5 ); + FROUND( C, D, E, F, G, H, A, B, round + 6, 6 ); + FROUND( B, C, D, E, F, G, H, A, round + 7, 7 ); + FROUND( A, B, C, D, E, F, G, H, round + 8, 8 ); + FROUND( H, A, B, C, D, E, F, G, round + 9, 9 ); + FROUND( G, H, A, B, C, D, E, F, round + 10, 10 ); + FROUND( F, G, H, A, B, C, D, E, round + 11, 11 ); + FROUND( E, F, G, H, A, B, C, D, round + 12, 12 ); + FROUND( D, E, F, G, H, A, B, C, round + 13, 13 ); + FROUND( C, D, E, F, G, H, A, B, round + 14, 14 ); + FROUND( B, C, D, E, F, G, H, A, round + 15, 15 ); + } + + pH[0] = A = ADD( A, pH[0] ); + pH[1] = B = ADD( B, pH[1] ); + pH[2] = C = ADD( C, pH[2] ); + pH[3] = D = ADD( D, pH[3] ); + pH[4] = E = ADD( E, pH[4] ); + pH[5] = F = ADD( F, pH[5] ); + pH[6] = G = ADD( G, pH[6] ); + pH[7] = H = ADD( H, pH[7] ); + + pbData += 128; + cbData -= 128; + } + + *pcbRemaining = cbData; + + // + // Wipe the variables; + // + SymCryptWipeKnownSize( W, sizeof( W ) ); + SymCryptWipeKnownSize( &A, sizeof( A ) ); + SymCryptWipeKnownSize( &B, sizeof( B ) ); + SymCryptWipeKnownSize( &C, sizeof( C ) ); + SymCryptWipeKnownSize( &D, sizeof( D ) ); + SymCryptWipeKnownSize( &E, sizeof( E ) ); + SymCryptWipeKnownSize( &F, sizeof( F ) ); + SymCryptWipeKnownSize( &G, sizeof( G ) ); + SymCryptWipeKnownSize( &H, sizeof( H ) ); + SymCryptWipeKnownSize( &Wt, sizeof( Wt ) ); +} + +#endif + +//====================================================================================== +// +// Switch between different implementations of compression function +// +//FORCEINLINE +VOID +SYMCRYPT_CALL +SymCryptSha512AppendBlocks( + _Inout_ SYMCRYPT_SHA512_CHAINING_STATE * pChain, + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_ SIZE_T * pcbRemaining ) +{ +#if SYMCRYPT_CPU_AMD64 + + // Temporarily disabling use of Ymm in SHA2 + // SYMCRYPT_EXTENDED_SAVE_DATA SaveData; + + // if (SYMCRYPT_CPU_FEATURES_PRESENT(SYMCRYPT_CPU_FEATURE_AVX512 | SYMCRYPT_CPU_FEATURE_BMI2) && + // SymCryptSaveYmm(&SaveData) == SYMCRYPT_NO_ERROR) + // { + // SymCryptSha512AppendBlocks_ymm_avx512vl_asm(pChain, pbData, cbData, pcbRemaining); + + // SymCryptRestoreYmm(&SaveData); + // } + // else if (SYMCRYPT_CPU_FEATURES_PRESENT(SYMCRYPT_CPU_FEATURE_AVX2 | SYMCRYPT_CPU_FEATURE_BMI2) && + // SymCryptSaveYmm(&SaveData) == SYMCRYPT_NO_ERROR) + // { + // //SymCryptSha512AppendBlocks_ymm_1block(pChain, pbData, cbData, pcbRemaining); + // //SymCryptSha512AppendBlocks_ymm_2blocks(pChain, pbData, cbData, pcbRemaining); + // //SymCryptSha512AppendBlocks_ymm_4blocks(pChain, pbData, cbData, pcbRemaining); + // SymCryptSha512AppendBlocks_ymm_avx2_asm(pChain, pbData, cbData, pcbRemaining); + + // SymCryptRestoreYmm(&SaveData); + // } + // else + { + SymCryptSha512AppendBlocks_ull( pChain, pbData, cbData, pcbRemaining ); + //SymCryptSha512AppendBlocks_ull2( pChain, pbData, cbData, pcbRemaining ); + //SymCryptSha512AppendBlocks_ull3( pChain, pbData, cbData, pcbRemaining ); + } + + +#elif SYMCRYPT_CPU_ARM + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_NEON ) ) + { + SymCryptSha512AppendBlocks_neon( pChain, pbData, cbData, pcbRemaining ); // Tegra T3: 48 c/B + } else { + SymCryptSha512AppendBlocks_ull( pChain, pbData, cbData, pcbRemaining ); // Tegra T3: 65.34 c/B + //SymCryptSha512AppendBlocks_ull2( pChain, pbData, cbData, pcbRemaining ); // Tegra T3: 77.4 c/B + //SymCryptSha512AppendBlocks_ull3( pChain, pbData, cbData, pcbRemaining ); // Tegra T3: 71.6 c/B + } + +#elif SYMCRYPT_CPU_X86 + + SYMCRYPT_EXTENDED_SAVE_DATA SaveData; + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_SSSE3 ) && SymCryptSaveXmm( &SaveData ) == SYMCRYPT_NO_ERROR ) + { + SymCryptSha512AppendBlocks_xmm( pChain, pbData, cbData, pcbRemaining ); + SymCryptRestoreXmm( &SaveData ); + } else { + SymCryptSha512AppendBlocks_ull( pChain, pbData, cbData, pcbRemaining ); // core2: 36.40 c/B + //SymCryptSha512AppendBlocks_ull2( pChain, pbData, cbData, pcbRemaining ); // core2: 49.09 c/B + //SymCryptSha512AppendBlocks_ull3( pChain, pbData, cbData, pcbRemaining ); // core2: 38.29 c/B + } + +#else + + SymCryptSha512AppendBlocks_ull( pChain, pbData, cbData, pcbRemaining ); // need tuning... + +#endif +} diff --git a/libs/symcrypt/lib/sha512Par-ymm.c b/libs/symcrypt/lib/sha512Par-ymm.c new file mode 100644 index 00000000000..c720569c929 --- /dev/null +++ b/libs/symcrypt/lib/sha512Par-ymm.c @@ -0,0 +1,243 @@ +// +// Sha512Par-ymm.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// +// All YMM code for SHA-512/SHA-384 Parallel operations +// Requires compiler support for avx2 +// + +#include "precomp.h" + +extern SYMCRYPT_ALIGN_AT( 64 ) const UINT64 SymCryptSha512K[81]; + + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + +#ifdef __clang__ +#pragma clang attribute push (__attribute__((target("avx2"))), apply_to=function) +#else +#pragma GCC push_options +#pragma GCC target("avx2") +#endif + +// +// Code that uses the YMM registers. +// + +// +// ugly hack, there is no generic way to broadcast a 64-bit value between x86 & amd64 +// +#if SYMCRYPT_CPU_X86 +#define M4x64broadcast_load(_p) _mm256_set_epi32( ((UINT32 *)(_p))[1], ((UINT32 *)(_p))[0], ((UINT32 *)(_p))[1], ((UINT32 *)(_p))[0], ((UINT32 *)(_p))[1], ((UINT32 *)(_p))[0], ((UINT32 *)(_p))[1], ((UINT32 *)(_p))[0] ) +#elif SYMCRYPT_CPU_AMD64 +#define M4x64broadcast_load(_p) _mm256_set1_epi64x( *(_p) ) +#endif + +#define MAJYMM( x, y, z ) _mm256_or_si256( _mm256_and_si256( _mm256_or_si256( z, y ), x ), _mm256_and_si256( z, y )) +#define CHYMM( x, y, z ) _mm256_xor_si256( _mm256_and_si256( _mm256_xor_si256( z, y ), x ), z ) + +#define CSIGMA0YMM( x ) \ + _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( \ + _mm256_slli_epi64(x,36) , _mm256_srli_epi64(x, 28) ),\ + _mm256_slli_epi64(x,30) ), _mm256_srli_epi64(x, 34) ),\ + _mm256_slli_epi64(x,25) ), _mm256_srli_epi64(x, 39) ) +#define CSIGMA1YMM( x ) \ + _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( \ + _mm256_slli_epi64(x,50) , _mm256_srli_epi64(x, 14) ),\ + _mm256_slli_epi64(x,46) ), _mm256_srli_epi64(x, 18) ),\ + _mm256_slli_epi64(x,23) ), _mm256_srli_epi64(x, 41) ) +#define LSIGMA0YMM( x ) \ + _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( \ + _mm256_slli_epi64(x,63) , _mm256_srli_epi64(x, 1) ),\ + _mm256_slli_epi64(x,56) ), _mm256_srli_epi64(x, 8) ),\ + _mm256_srli_epi64(x, 7) ) +#define LSIGMA1YMM( x ) \ + _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( _mm256_xor_si256( \ + _mm256_slli_epi64(x,45) , _mm256_srli_epi64(x, 19) ),\ + _mm256_slli_epi64(x, 3) ), _mm256_srli_epi64(x, 61) ),\ + _mm256_srli_epi64(x,6) ) + +// +// S0: 00 01 02 03 +// S1: 10 11 12 13 +// S2: 20 21 22 23 +// S3: 30 31 32 33 +// +// T0: 00 10 02 12 unpacklo_epi64( S0, S1 ) note: unpacklo in AVX works in parallel on 2 128-bit values +// T1: 01 11 03 13 unpackhi_epi64( S0, S1 ) +// T2: 20 30 22 32 +// T3: 21 31 23 33 +// +// R0: 00 10 20 30 +// R1: 01 11 21 31 +// R2: 02 12 22 32 +// R3: 03 13 23 33 + + +#define YMM_TRANSPOSE_64( _R0, _R1, _R2, _R3, _S0, _S1, _S2, _S3 ) \ + {\ + __m256i _T0, _T1, _T2, _T3;\ + _T0 = _mm256_unpacklo_epi64( _S0, _S1 ); _T1 = _mm256_unpackhi_epi64( _S0, _S1 );\ + _T2 = _mm256_unpacklo_epi64( _S2, _S3 ); _T3 = _mm256_unpackhi_epi64( _S2, _S3 );\ + \ + _R0 = _mm256_permute2x128_si256( _T0, _T2, 0x20 ); _R1 = _mm256_permute2x128_si256( _T1, _T3, 0x20);\ + _R2 = _mm256_permute2x128_si256( _T0, _T2, 0x31 ); _R3 = _mm256_permute2x128_si256( _T1, _T3, 0x31);\ + } + +VOID +SYMCRYPT_CALL +SymCryptParallelSha512AppendBlocks_ymm( + _Inout_updates_( 4 ) PSYMCRYPT_SHA512_CHAINING_STATE * pChain, + _Inout_updates_( 4 ) PCBYTE * ppByte, + SIZE_T nBytes, + _Out_writes_( PAR_SCRATCH_ELEMENTS_512 * 32 ) PBYTE pScratch ) +{ + __m256i * buf = (__m256i *)pScratch; // chaining state concatenated with the expanded input block + __m256i * W = &buf[4 + 8]; // W are the 64 words of the expanded input + __m256i * ha = &buf[4]; // initial state words, in order h, g, ..., b, a + __m256i A, B, C, D, T; + __m256i T0, T1, T2, T3; + int r; + __m256i BYTE_REVERSE_64; + + _mm256_zeroupper(); + BYTE_REVERSE_64 = _mm256_set_epi8( 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7 ); + + // + // The chaining state can be unaligned on x86, so we use unaligned loads + // + + T0 = _mm256_loadu_si256( (__m256i *)&pChain[0]->H[0] ); + T1 = _mm256_loadu_si256( (__m256i *)&pChain[1]->H[0] ); + T2 = _mm256_loadu_si256( (__m256i *)&pChain[2]->H[0] ); + T3 = _mm256_loadu_si256( (__m256i *)&pChain[3]->H[0] ); + + YMM_TRANSPOSE_64( ha[7], ha[6], ha[5], ha[4], T0, T1, T2, T3 ); + + T0 = _mm256_loadu_si256( (__m256i *)&pChain[0]->H[4] ); + T1 = _mm256_loadu_si256( (__m256i *)&pChain[1]->H[4] ); + T2 = _mm256_loadu_si256( (__m256i *)&pChain[2]->H[4] ); + T3 = _mm256_loadu_si256( (__m256i *)&pChain[3]->H[4] ); + + YMM_TRANSPOSE_64( ha[3], ha[2], ha[1], ha[0], T0, T1, T2, T3 ); + + buf[0] = ha[4]; + buf[1] = ha[5]; + buf[2] = ha[6]; + buf[3] = ha[7]; + + while( nBytes >= 128 ) + { + + // + // Capture the input into W[0..15] + // + for( r=0; r<16; r += 4 ) + { + T0 = _mm256_shuffle_epi8( _mm256_loadu_si256( (__m256i *) ppByte[0] ), BYTE_REVERSE_64 ); ppByte[0] += 32; + T1 = _mm256_shuffle_epi8( _mm256_loadu_si256( (__m256i *) ppByte[1] ), BYTE_REVERSE_64 ); ppByte[1] += 32; + T2 = _mm256_shuffle_epi8( _mm256_loadu_si256( (__m256i *) ppByte[2] ), BYTE_REVERSE_64 ); ppByte[2] += 32; + T3 = _mm256_shuffle_epi8( _mm256_loadu_si256( (__m256i *) ppByte[3] ), BYTE_REVERSE_64 ); ppByte[3] += 32; + + YMM_TRANSPOSE_64( W[r], W[r+1], W[r+2], W[r+3], T0, T1, T2, T3 ); + } + + // + // Expand the message + // + A = W[15]; + B = W[14]; + D = W[0]; + for( r=16; r<80; r+= 2 ) + { + // Loop invariant: A=W[r-1], B = W[r-2], D = W[r-16] + + // + // Macro for one word of message expansion. + // Invariant: + // on entry: a = W[r-1], b = W[r-2], d = W[r-16] + // on exit: W[r] computed, a = W[r-1], b = W[r], c = W[r-15] + // + #define EXPAND( a, b, c, d, r ) \ + c = W[r-15]; \ + b = _mm256_add_epi64( _mm256_add_epi64( _mm256_add_epi64( d, LSIGMA1YMM( b ) ), W[r-7] ), LSIGMA0YMM( c ) ); \ + W[r] = b; \ + + EXPAND( A, B, C, D, r ); + EXPAND( B, A, D, C, (r+1)); + + #undef EXPAND + } + + A = ha[7]; + B = ha[6]; + C = ha[5]; + D = ha[4]; + + for( r=0; r<80; r += 4 ) + { + // + // Loop invariant: + // A, B, C, and D are the a,b,c,d values of the current state. + // W[r] is the next expanded message word to be processed. + // W[r-8 .. r-5] contain the current state words h, g, f, e. + // + + // + // Macro to compute one round + // The shuffle is to duplicate the 64-bit value to both lanes. + // Each half of the immediate is 0100. See the documentation of the + // PSHUFD instruction. + // + #define DO_ROUND( a, b, c, d, t, r ) \ + t = W[r]; \ + t = _mm256_add_epi64( t, CSIGMA1YMM( W[r-5] ) ); \ + t = _mm256_add_epi64( t, W[r-8] ); \ + t = _mm256_add_epi64( t, CHYMM( W[r-5], W[r-6], W[r-7] ) ); \ + t = _mm256_add_epi64( t, M4x64broadcast_load( &SymCryptSha512K[r] )); \ + W[r-4] = _mm256_add_epi64( t, d ); \ + d = _mm256_add_epi64( t, CSIGMA0YMM( a ) ); \ + d = _mm256_add_epi64( d, MAJYMM( c, b, a ) ); + + DO_ROUND( A, B, C, D, T, r ); + DO_ROUND( D, A, B, C, T, (r+1) ); + DO_ROUND( C, D, A, B, T, (r+2) ); + DO_ROUND( B, C, D, A, T, (r+3) ); + #undef DO_ROUND + } + + buf[3] = ha[7] = _mm256_add_epi64( buf[3], A ); + buf[2] = ha[6] = _mm256_add_epi64( buf[2], B ); + buf[1] = ha[5] = _mm256_add_epi64( buf[1], C ); + buf[0] = ha[4] = _mm256_add_epi64( buf[0], D ); + ha[3] = _mm256_add_epi64( ha[3], W[r-5] ); + ha[2] = _mm256_add_epi64( ha[2], W[r-6] ); + ha[1] = _mm256_add_epi64( ha[1], W[r-7] ); + ha[0] = _mm256_add_epi64( ha[0], W[r-8] ); + + nBytes -= 128; + } + + YMM_TRANSPOSE_64( T0, T1, T2, T3, ha[7], ha[6], ha[5], ha[4] ); + _mm256_storeu_si256( (__m256i *)&pChain[0]->H[0], T0 ); + _mm256_storeu_si256( (__m256i *)&pChain[1]->H[0], T1 ); + _mm256_storeu_si256( (__m256i *)&pChain[2]->H[0], T2 ); + _mm256_storeu_si256( (__m256i *)&pChain[3]->H[0], T3 ); + + YMM_TRANSPOSE_64( T0, T1, T2, T3, ha[3], ha[2], ha[1], ha[0] ); + _mm256_storeu_si256( (__m256i *)&pChain[0]->H[4], T0 ); + _mm256_storeu_si256( (__m256i *)&pChain[1]->H[4], T1 ); + _mm256_storeu_si256( (__m256i *)&pChain[2]->H[4], T2 ); + _mm256_storeu_si256( (__m256i *)&pChain[3]->H[4], T3 ); + + _mm256_zeroupper(); +} + +#ifdef __clang__ +#pragma clang attribute pop +#else +#pragma GCC pop_options +#endif + +#endif // CPU_X86_X64 diff --git a/libs/symcrypt/lib/sha512Par.c b/libs/symcrypt/lib/sha512Par.c new file mode 100644 index 00000000000..d85bdecbf63 --- /dev/null +++ b/libs/symcrypt/lib/sha512Par.c @@ -0,0 +1,798 @@ +// +// Sha512Par.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +// +// This module contains the routines to implement SHA-512/SHA-384 from FIPS 180-2 in parallel mode +// + +#include "precomp.h" + +extern SYMCRYPT_ALIGN_AT( 64 ) const UINT64 SymCryptSha512K[81]; + + +// +// Not all CPU architectures support parallel code. +// +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + +#define SUPPORT_PARALLEL 1 + +#define MIN_PARALLEL 2 +#define MAX_PARALLEL 4 + +#elif SYMCRYPT_CPU_ARM + +#define SUPPORT_PARALLEL 0 +//#define MIN_PARALLEL 3 +//#define MAX_PARALLEL 3 + +#else + +#define SUPPORT_PARALLEL 0 + +#endif + + +// +// ugly hack, there is no generic way to broadcast a 64-bit value between x86 & amd64 +// +#if SYMCRYPT_CPU_X86 +#define M2x64broadcast_load(_p) _mm_set_epi32( ((UINT32 *)(_p))[1], ((UINT32 *)(_p))[0], ((UINT32 *)(_p))[1], ((UINT32 *)(_p))[0] ) +#elif SYMCRYPT_CPU_AMD64 +#define M2x64broadcast_load(_p) _mm_shuffle_epi32( _mm_cvtsi64_si128( *(_p) ), 0x44 ) +#endif + +VOID +SYMCRYPT_CALL +SymCryptParallelSha512AppendBytes_serial( + _Inout_updates_( nPar ) PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE * pWork, + _In_range_(1, MAX_PARALLEL) SIZE_T nPar, + SIZE_T nBytes ); + +// +// Currently these are the generic implementations in terms of the single hash code. +// + +VOID +SYMCRYPT_CALL +SymCryptParallelSha512Init( + _Out_writes_( nStates ) PSYMCRYPT_SHA512_STATE pStates, + SIZE_T nStates ) +{ + SIZE_T i; + + for( i=0; i<nStates; i++ ) + { + SymCryptSha512Init( &pStates[i] ); + } +} + +VOID +SYMCRYPT_CALL +SymCryptParallelSha384Init( + _Out_writes_( nStates ) PSYMCRYPT_SHA384_STATE pStates, + SIZE_T nStates ) +{ + SIZE_T i; + + for( i=0; i<nStates; i++ ) + { + SymCryptSha384Init( &pStates[i] ); + } +} + +#if !SUPPORT_PARALLEL +// +// No parallel support on this CPU +// + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptParallelSha512Process( + _Inout_updates_( nStates ) PSYMCRYPT_SHA512_STATE pStates, + SIZE_T nStates, + _Inout_updates_( nOperations ) PSYMCRYPT_PARALLEL_HASH_OPERATION pOperations, + SIZE_T nOperations, + _Out_writes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + return SymCryptParallelHashProcess_serial( SymCryptParallelSha512Algorithm, pStates, nStates, pOperations, nOperations, pbScratch, cbScratch ); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptParallelSha384Process( + _Inout_updates_( nStates ) PSYMCRYPT_SHA384_STATE pStates, + SIZE_T nStates, + _Inout_updates_( nOperations ) PSYMCRYPT_PARALLEL_HASH_OPERATION pOperations, + SIZE_T nOperations, + _Out_writes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + return SymCryptParallelHashProcess_serial( SymCryptParallelSha384Algorithm, pStates, nStates, pOperations, nOperations, pbScratch, cbScratch ); +} + +#endif + + +#if SUPPORT_PARALLEL + + +// +// This function looks at a state and decides what to do. +// If it returns FALSE, then this state is done and no further processing is required. +// If it returns TRUE, the pbData/cbData have to be processed in parallel. +// This function is called again on the same state after the pbData/cbData have been processed. +// +// Internally, it keeps track of the next step to be taken for this state. +// the processingState keeps track of the next action to take. +// + + +BOOLEAN +SYMCRYPT_CALL +SymCryptParallelSha512Result1( + _In_ PCSYMCRYPT_PARALLEL_HASH pParHash, + _Inout_ PSYMCRYPT_COMMON_HASH_STATE pState, + _Inout_ PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE pScratch, + _Out_ BOOLEAN *pRes) +{ + UINT32 bytesInBuffer = pState->bytesInBuffer; + + UNREFERENCED_PARAMETER( pParHash ); + // + // Function is called when a Result is requested from a parallel hash state. + // Do the first step of the padding. + // + pState->buffer[bytesInBuffer++] = 0x80; + SymCryptWipe( &pState->buffer[bytesInBuffer], SYMCRYPT_SHA512_INPUT_BLOCK_SIZE - bytesInBuffer ); + + pScratch->pbData = &pState->buffer[0]; + pScratch->cbData = SYMCRYPT_SHA512_INPUT_BLOCK_SIZE; + + if( bytesInBuffer > SYMCRYPT_SHA512_INPUT_BLOCK_SIZE - 16 ) + { + // We need 2 blocks for the padding + pScratch->processingState = STATE_RESULT2; + } else { + SYMCRYPT_STORE_MSBFIRST64( &pState->buffer[SYMCRYPT_SHA512_INPUT_BLOCK_SIZE-16], (pState->dataLengthH << 3) + (pState->dataLengthL >> 61) ); + SYMCRYPT_STORE_MSBFIRST64( &pState->buffer[SYMCRYPT_SHA512_INPUT_BLOCK_SIZE- 8], (pState->dataLengthL << 3) ); + pScratch->processingState = STATE_RESULT_DONE; + } + + *pRes = TRUE; // return value from the SetWork function + return TRUE; // Return from the SetWork function +} + + +BOOLEAN +SYMCRYPT_CALL +SymCryptParallelSha512Result2( + _In_ PCSYMCRYPT_PARALLEL_HASH pParHash, + _Inout_ PSYMCRYPT_COMMON_HASH_STATE pState, + _Inout_ PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE pScratch, + _Out_ BOOLEAN *pRes) +{ + UNREFERENCED_PARAMETER( pParHash ); + // + // Called for the 2nd block of a long padding + // + SymCryptWipe( &pState->buffer[0], SYMCRYPT_SHA512_INPUT_BLOCK_SIZE ); + SYMCRYPT_STORE_MSBFIRST64( &pState->buffer[SYMCRYPT_SHA512_INPUT_BLOCK_SIZE-16], (pState->dataLengthH << 3) + (pState->dataLengthL >> 61) ); + SYMCRYPT_STORE_MSBFIRST64( &pState->buffer[SYMCRYPT_SHA512_INPUT_BLOCK_SIZE- 8], (pState->dataLengthL << 3) ); + pScratch->pbData = &pState->buffer[0]; + pScratch->cbData = SYMCRYPT_SHA512_INPUT_BLOCK_SIZE; + pScratch->processingState = STATE_RESULT_DONE; + *pRes = TRUE; + return TRUE; +} + +VOID +SYMCRYPT_CALL +SymCryptParallelSha512ResultDone( + _In_ PCSYMCRYPT_PARALLEL_HASH pParHash, + _Inout_ PSYMCRYPT_COMMON_HASH_STATE pState, + _In_ PCSYMRYPT_PARALLEL_HASH_OPERATION pOp) +{ + PSYMCRYPT_SHA512_STATE pSha512State = (PSYMCRYPT_SHA512_STATE) pState; + + UNREFERENCED_PARAMETER( pParHash ); + + SYMCRYPT_ASSERT( pOp->hashOperation == SYMCRYPT_HASH_OPERATION_RESULT ); + SYMCRYPT_ASSERT( pOp->cbBuffer == SYMCRYPT_SHA512_RESULT_SIZE ); + + SymCryptUint64ToMsbFirst( &pSha512State->chain.H[0], pOp->pbBuffer, 8 ); + SymCryptWipeKnownSize( pSha512State, sizeof( *pSha512State )); + SymCryptSha512Init( pSha512State ); +} + +VOID +SYMCRYPT_CALL +SymCryptParallelSha384ResultDone( + _In_ PCSYMCRYPT_PARALLEL_HASH pParHash, + _Inout_ PSYMCRYPT_COMMON_HASH_STATE pState, + _In_ PCSYMRYPT_PARALLEL_HASH_OPERATION pOp) +{ + PSYMCRYPT_SHA384_STATE pSha384State = (PSYMCRYPT_SHA384_STATE) pState; + + UNREFERENCED_PARAMETER( pParHash ); + + SYMCRYPT_ASSERT( pOp->hashOperation == SYMCRYPT_HASH_OPERATION_RESULT ); + SYMCRYPT_ASSERT( pOp->cbBuffer == SYMCRYPT_SHA384_RESULT_SIZE ); + + SymCryptUint64ToMsbFirst( &pSha384State->chain.H[0], pOp->pbBuffer, 6 ); + SymCryptWipeKnownSize( pSha384State, sizeof( *pSha384State )); + SymCryptSha384Init( pSha384State ); +} + + +C_ASSERT( (SYMCRYPT_SIMD_ELEMENT_SIZE & (SYMCRYPT_SIMD_ELEMENT_SIZE - 1 )) == 0 ); // check that it is a power of 2 + + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptParallelSha512Sha384Process( + _In_ PCSYMCRYPT_PARALLEL_HASH pParHash, + _Inout_ PVOID pStates, + SIZE_T nStates, + _Inout_updates_( nOperations ) PSYMCRYPT_PARALLEL_HASH_OPERATION pOperations, + SIZE_T nOperations, + _Out_writes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + UINT32 maxParallel; + +#if SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_X86 + SYMCRYPT_EXTENDED_SAVE_DATA SaveState; + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_AVX2 | SYMCRYPT_CPU_FEATURE_SSSE3 ) && SymCryptSaveYmm( &SaveState ) == SYMCRYPT_NO_ERROR ) + { + maxParallel = 4; + scError = SymCryptParallelHashProcess( pParHash, + pStates, + nStates, + pOperations, + nOperations, + pbScratch, + cbScratch, + maxParallel ); + + SymCryptRestoreYmm( &SaveState ); + } else if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_SSSE3 ) && SymCryptSaveXmm( &SaveState ) == SYMCRYPT_NO_ERROR ) + { + maxParallel = 2; + scError = SymCryptParallelHashProcess( pParHash, + pStates, + nStates, + pOperations, + nOperations, + pbScratch, + cbScratch, + maxParallel ); + SymCryptRestoreXmm( &SaveState ); + } else { + scError = SymCryptParallelHashProcess_serial( pParHash, pStates, nStates, pOperations, nOperations, pbScratch, cbScratch ); + } + +#elif SYMCRYPT_CPU_ARM + maxParallel = MAX_PARALLEL; + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_NEON ) ) + { + scError = SymCryptParallelHashProcess( pParHash, + pStates, + nStates, + pOperations, + nOperations, + pbScratch, + cbScratch, + maxParallel ); + } else { + scError = SymCryptParallelHashProcess_serial( pParHash, pStates, nStates, pOperations, nOperations, pbScratch, cbScratch ); + } +#else + scError = SymCryptParallelHashProcess_serial( pParHash, pStates, nStates, pOperations, nOperations, pbScratch, cbScratch ); +#endif + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptParallelSha512Process( + _Inout_updates_( nStates ) PSYMCRYPT_SHA512_STATE pStates, + SIZE_T nStates, + _Inout_updates_( nOperations ) PSYMCRYPT_PARALLEL_HASH_OPERATION pOperations, + SIZE_T nOperations, + _Out_writes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + return SymCryptParallelSha512Sha384Process( SymCryptParallelSha512Algorithm, pStates, nStates, pOperations, nOperations, pbScratch, cbScratch ); +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptParallelSha384Process( + _Inout_updates_( nStates ) PSYMCRYPT_SHA384_STATE pStates, + SIZE_T nStates, + _Inout_updates_( nOperations ) PSYMCRYPT_PARALLEL_HASH_OPERATION pOperations, + SIZE_T nOperations, + _Out_writes_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch ) +{ + return SymCryptParallelSha512Sha384Process( SymCryptParallelSha384Algorithm, pStates, nStates, pOperations, nOperations, pbScratch, cbScratch ); +} + + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 +// +// Code that uses the XMM registers. +// + +#ifdef __clang__ +#pragma clang attribute push (__attribute__((target("ssse3"))), apply_to=function) +#else +#pragma GCC push_options +#pragma GCC target("ssse3") +#endif + +#define MAJXMM( x, y, z ) _mm_or_si128( _mm_and_si128( _mm_or_si128( z, y ), x ), _mm_and_si128( z, y )) +#define CHXMM( x, y, z ) _mm_xor_si128( _mm_and_si128( _mm_xor_si128( z, y ), x ), z ) + +#define CSIGMA0XMM( x ) \ + _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( \ + _mm_slli_epi64(x,36) , _mm_srli_epi64(x, 28) ),\ + _mm_slli_epi64(x,30) ), _mm_srli_epi64(x, 34) ),\ + _mm_slli_epi64(x,25) ), _mm_srli_epi64(x, 39) ) +#define CSIGMA1XMM( x ) \ + _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( \ + _mm_slli_epi64(x,50) , _mm_srli_epi64(x, 14) ),\ + _mm_slli_epi64(x,46) ), _mm_srli_epi64(x, 18) ),\ + _mm_slli_epi64(x,23) ), _mm_srli_epi64(x, 41) ) +#define LSIGMA0XMM( x ) \ + _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( \ + _mm_slli_epi64(x,63) , _mm_srli_epi64(x, 1) ),\ + _mm_slli_epi64(x,56) ), _mm_srli_epi64(x, 8) ),\ + _mm_srli_epi64(x, 7) ) +#define LSIGMA1XMM( x ) \ + _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( \ + _mm_slli_epi64(x,45) , _mm_srli_epi64(x, 19) ),\ + _mm_slli_epi64(x, 3) ), _mm_srli_epi64(x, 61) ),\ + _mm_srli_epi64(x,6) ) + +#define XMM_TRANSPOSE_64( _R0, _R1, _S0, _S1 ) \ + {\ + _R0 = _mm_unpacklo_epi64( _S0, _S1 );\ + _R1 = _mm_unpackhi_epi64( _S0, _S1 );\ + } + +VOID +SYMCRYPT_CALL +SymCryptParallelSha512AppendBlocks_xmm( + _Inout_updates_( 2 ) PSYMCRYPT_SHA512_CHAINING_STATE * pChain, + _Inout_updates_( 2 ) PCBYTE * ppByte, + SIZE_T nBytes, + _Out_writes_( PAR_SCRATCH_ELEMENTS_512 ) __m128i * pScratch ) +{ + __m128i * buf = pScratch; // chaining state concatenated with the expanded input block + __m128i * W = &buf[4 + 8]; // W are the 64 words of the expanded input + __m128i * ha = &buf[4]; // initial state words, in order h, g, ..., b, a + __m128i A, B, C, D, T; + __m128i T0, T1; + const __m128i BYTE_REVERSE_64 = _mm_set_epi8( 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7 ); + int r; + + + // + // The chaining state can be unaligned on x86, so we use unaligned loads + // + + T0 = _mm_loadu_si128( (__m128i *)&pChain[0]->H[0] ); + T1 = _mm_loadu_si128( (__m128i *)&pChain[1]->H[0] ); + + XMM_TRANSPOSE_64( ha[7], ha[6], T0, T1 ); + + T0 = _mm_loadu_si128( (__m128i *)&pChain[0]->H[2] ); + T1 = _mm_loadu_si128( (__m128i *)&pChain[1]->H[2] ); + XMM_TRANSPOSE_64( ha[5], ha[4], T0, T1 ); + + T0 = _mm_loadu_si128( (__m128i *)&pChain[0]->H[4] ); + T1 = _mm_loadu_si128( (__m128i *)&pChain[1]->H[4] ); + XMM_TRANSPOSE_64( ha[3], ha[2], T0, T1 ); + + T0 = _mm_loadu_si128( (__m128i *)&pChain[0]->H[6] ); + T1 = _mm_loadu_si128( (__m128i *)&pChain[1]->H[6] ); + XMM_TRANSPOSE_64( ha[1], ha[0], T0, T1 ); + + buf[0] = ha[4]; + buf[1] = ha[5]; + buf[2] = ha[6]; + buf[3] = ha[7]; + + while( nBytes >= 128 ) + { + + // + // Capture the input into W[0..15] + // + for( r=0; r<16; r += 2 ) + { + T0 = _mm_shuffle_epi8( _mm_loadu_si128( (__m128i *) ppByte[0] ), BYTE_REVERSE_64 ); ppByte[0] += 16; + T1 = _mm_shuffle_epi8( _mm_loadu_si128( (__m128i *) ppByte[1] ), BYTE_REVERSE_64 ); ppByte[1] += 16; + + XMM_TRANSPOSE_64( W[r], W[r+1], T0, T1 ); + } + + // + // Expand the message + // + A = W[15]; + B = W[14]; + D = W[0]; + for( r=16; r<80; r+= 2 ) + { + // Loop invariant: A=W[r-1], B = W[r-2], D = W[r-16] + + // + // Macro for one word of message expansion. + // Invariant: + // on entry: a = W[r-1], b = W[r-2], d = W[r-16] + // on exit: W[r] computed, a = W[r-1], b = W[r], c = W[r-15] + // + #define EXPAND( a, b, c, d, r ) \ + c = W[r-15]; \ + b = _mm_add_epi64( _mm_add_epi64( _mm_add_epi64( d, LSIGMA1XMM( b ) ), W[r-7] ), LSIGMA0XMM( c ) ); \ + W[r] = b; \ + + EXPAND( A, B, C, D, r ); + EXPAND( B, A, D, C, (r+1)); + + #undef EXPAND + } + + A = ha[7]; + B = ha[6]; + C = ha[5]; + D = ha[4]; + + for( r=0; r<80; r += 4 ) + { + // + // Loop invariant: + // A, B, C, and D are the a,b,c,d values of the current state. + // W[r] is the next expanded message word to be processed. + // W[r-8 .. r-5] contain the current state words h, g, f, e. + // + + // + // Macro to compute one round + // The shuffle is to duplicate the 64-bit value to both lanes. + // Each half of the immediate is 0100. See the documentation of the + // PSHUFD instruction. + // + + #define DO_ROUND( a, b, c, d, t, r ) \ + t = W[r]; \ + t = _mm_add_epi64( t, CSIGMA1XMM( W[r-5] ) ); \ + t = _mm_add_epi64( t, W[r-8] ); \ + t = _mm_add_epi64( t, CHXMM( W[r-5], W[r-6], W[r-7] ) ); \ + t = _mm_add_epi64( t, M2x64broadcast_load( &SymCryptSha512K[r] )); \ + W[r-4] = _mm_add_epi64( t, d ); \ + d = _mm_add_epi64( t, CSIGMA0XMM( a ) ); \ + d = _mm_add_epi64( d, MAJXMM( c, b, a ) ); + + DO_ROUND( A, B, C, D, T, r ); + DO_ROUND( D, A, B, C, T, (r+1) ); + DO_ROUND( C, D, A, B, T, (r+2) ); + DO_ROUND( B, C, D, A, T, (r+3) ); + #undef DO_ROUND + } + + buf[3] = ha[7] = _mm_add_epi64( buf[3], A ); + buf[2] = ha[6] = _mm_add_epi64( buf[2], B ); + buf[1] = ha[5] = _mm_add_epi64( buf[1], C ); + buf[0] = ha[4] = _mm_add_epi64( buf[0], D ); + ha[3] = _mm_add_epi64( ha[3], W[r-5] ); + ha[2] = _mm_add_epi64( ha[2], W[r-6] ); + ha[1] = _mm_add_epi64( ha[1], W[r-7] ); + ha[0] = _mm_add_epi64( ha[0], W[r-8] ); + + nBytes -= 128; + } + + + XMM_TRANSPOSE_64( T0, T1, ha[7], ha[6] ); + _mm_storeu_si128( (__m128i *)&pChain[0]->H[0], T0 ); + _mm_storeu_si128( (__m128i *)&pChain[1]->H[0], T1 ); + + XMM_TRANSPOSE_64( T0, T1, ha[5], ha[4] ); + _mm_storeu_si128( (__m128i *)&pChain[0]->H[2], T0 ); + _mm_storeu_si128( (__m128i *)&pChain[1]->H[2], T1 ); + + XMM_TRANSPOSE_64( T0, T1, ha[3], ha[2] ); + _mm_storeu_si128( (__m128i *)&pChain[0]->H[4], T0 ); + _mm_storeu_si128( (__m128i *)&pChain[1]->H[4], T1 ); + + XMM_TRANSPOSE_64( T0, T1, ha[1], ha[0] ); + _mm_storeu_si128( (__m128i *)&pChain[0]->H[6], T0 ); + _mm_storeu_si128( (__m128i *)&pChain[1]->H[6], T1 ); + +} + +#ifdef __clang__ +#pragma clang attribute pop +#else +#pragma GCC pop_options +#endif + +#endif // CPU_X86_X64 + +#if SYMCRYPT_CPU_ARM + + +#endif // CPU_ARM + + + +VOID +SYMCRYPT_CALL +SymCryptParallelSha512AppendBytes_serial( + _Inout_updates_( nPar ) PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE * pWork, + _In_range_(1, MAX_PARALLEL) SIZE_T nPar, + SIZE_T nBytes ) +{ + SIZE_T i; + SIZE_T tmp; + + SYMCRYPT_ASSERT( nBytes % SYMCRYPT_SHA512_INPUT_BLOCK_SIZE == 0 ); + SYMCRYPT_ASSERT( nPar >= 1 && nPar <= MAX_PARALLEL ); + + for( i=0; i < nPar; i++ ) + { + SYMCRYPT_ASSERT( pWork[i]->cbData >= nBytes ); +#if SYMCRYPT_CPU_X86 + // + // On X86 the Sha512 append blocks function saves the XMM registers again, which is not allowed at DISPATCH level. + // We call the internal function that assumes the XMM registers are already saved. + // This function is only called when we are doing parallel hashing, which means that at a minimum we have SSSE3 and + // the XMM registers are saved. + // + SymCryptSha512AppendBlocks_xmm( & ((PSYMCRYPT_SHA512_STATE)(pWork[i]->hashState))->chain, pWork[i]->pbData, nBytes, &tmp ); +#else + SymCryptSha512AppendBlocks( & ((PSYMCRYPT_SHA512_STATE)(pWork[i]->hashState))->chain, pWork[i]->pbData, nBytes, &tmp ); +#endif + pWork[i]->pbData += nBytes; + pWork[i]->cbData -= nBytes; + } + return; +} + +VOID +SYMCRYPT_CALL +SymCryptParallelSha512Append( + _Inout_updates_( nPar ) PSYMCRYPT_PARALLEL_HASH_SCRATCH_STATE * pWork, + _In_range_(1, MAX_PARALLEL) SIZE_T nPar, + SIZE_T nBytes, + _Inout_updates_( SYMCRYPT_SIMD_ELEMENT_SIZE * PAR_SCRATCH_ELEMENTS_512 ) + PBYTE pbSimdScratch, + SIZE_T cbSimdScratch ) +{ + PSYMCRYPT_SHA512_CHAINING_STATE apChain[MAX_PARALLEL]; + PCBYTE apData[MAX_PARALLEL]; + SIZE_T i; + UINT32 maxParallel; + + UNREFERENCED_PARAMETER( cbSimdScratch ); // not referenced on FRE builds + SYMCRYPT_ASSERT( cbSimdScratch >= PAR_SCRATCH_ELEMENTS_512 * SYMCRYPT_SIMD_ELEMENT_SIZE ); + SYMCRYPT_ASSERT( ((SIZE_T)pbSimdScratch & (SYMCRYPT_SIMD_ELEMENT_SIZE - 1)) == 0 ); + + // + // Compute maxParallel; this is 2 if nPar <= 2, and 4 if nPar = 3,4. + // This is how many parameter sets we have to set up. + // +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + + maxParallel = (nPar + 1) & ~1; + SYMCRYPT_ASSERT( maxParallel == 2 || (maxParallel == 4 && SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_AVX2 )) ); + +#elif SYMCRYPT_CPU_ARM + + maxParallel = 2; + +#endif + + SYMCRYPT_ASSERT( nPar >= 1 && nPar <= maxParallel ); + + if( nPar < MIN_PARALLEL ) + { + SymCryptParallelSha512AppendBytes_serial( pWork, nPar, nBytes ); + + // Done with this function. + goto cleanup; + } + + // + // Our parallel code expects exactly 2 or 4 parallel computations. + // We simply duplicate the first one if we get fewer parallel ones. + // That means we write the result multiple times, but it saves a lot of + // extra if()s in the main codeline. + // + + i = 0; + while( i < nPar ) + { + SYMCRYPT_ASSERT( pWork[i]->cbData >= nBytes ); + apChain[i] = & ((PSYMCRYPT_SHA512_STATE)(pWork[i]->hashState))->chain; + apData[i] = pWork[i]->pbData; + pWork[i]->pbData += nBytes; + pWork[i]->cbData -= nBytes; + i++; + } + + while( i < maxParallel ) + { + apChain[i] = apChain[0]; + apData[i] = apData[0]; + i++; + } + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + if( maxParallel == 4 ) + { + SymCryptParallelSha512AppendBlocks_ymm( &apChain[0], &apData[0], nBytes, (PBYTE)((__m256i *)pbSimdScratch) ); + } else { + SymCryptParallelSha512AppendBlocks_xmm( &apChain[0], &apData[0], nBytes, (__m128i *)pbSimdScratch ); + } +#elif SYMCRYPT_CPU_ARM + UNREFERENCED_PARAMETER( pbSimdScratch ); + //SymCryptParallelSha512AppendBlocks_neon( &apChain[0], &apData[0], nBytes, (__n128 *) pbSimdScratch ); +#else +#error Unknown CPU +#endif + +cleanup: + ;// no cleanup at this moment. +} + + +#endif // SUPPORT_PARALLEL + +#if SUPPORT_PARALLEL + +const SYMCRYPT_PARALLEL_HASH SymCryptParallelSha512Algorithm_default = { + &SymCryptSha512Algorithm_default, + PAR_SCRATCH_ELEMENTS_512 * SYMCRYPT_SIMD_ELEMENT_SIZE, + &SymCryptParallelSha512Result1, + &SymCryptParallelSha512Result2, + &SymCryptParallelSha512ResultDone, + &SymCryptParallelSha512Append, +}; + +const SYMCRYPT_PARALLEL_HASH SymCryptParallelSha384Algorithm_default = { + &SymCryptSha384Algorithm_default, + PAR_SCRATCH_ELEMENTS_512 * SYMCRYPT_SIMD_ELEMENT_SIZE, + &SymCryptParallelSha512Result1, + &SymCryptParallelSha512Result2, + &SymCryptParallelSha384ResultDone, + &SymCryptParallelSha512Append, +}; + +#else + +// +// For platforms that do not have a parallel hash implementation +// we use this structure to provide the necessary data to the _serial +// implementation of the function. +// +const SYMCRYPT_PARALLEL_HASH SymCryptParallelSha512Algorithm_default = { + &SymCryptSha512Algorithm_default, + PAR_SCRATCH_ELEMENTS_512 * SYMCRYPT_SIMD_ELEMENT_SIZE, + NULL, + NULL, + NULL, + NULL, +}; + +const SYMCRYPT_PARALLEL_HASH SymCryptParallelSha384Algorithm_default = { + &SymCryptSha384Algorithm_default, + PAR_SCRATCH_ELEMENTS_512 * SYMCRYPT_SIMD_ELEMENT_SIZE, + NULL, + NULL, + NULL, + NULL, +}; + +#endif + +const PCSYMCRYPT_PARALLEL_HASH SymCryptParallelSha384Algorithm = &SymCryptParallelSha384Algorithm_default; +const PCSYMCRYPT_PARALLEL_HASH SymCryptParallelSha512Algorithm = &SymCryptParallelSha512Algorithm_default; + + +#define N_SELFTEST_STATES 3 // Just enough to trigger YMM usage + +VOID +SYMCRYPT_CALL +SymCryptParallelSha384Selftest(void) +{ + SYMCRYPT_ERROR scError; + SYMCRYPT_SHA384_STATE states[N_SELFTEST_STATES]; + BYTE result[N_SELFTEST_STATES][SYMCRYPT_SHA384_RESULT_SIZE]; + SYMCRYPT_PARALLEL_HASH_OPERATION op[2*N_SELFTEST_STATES]; + BYTE scratch[SYMCRYPT_PARALLEL_SHA384_FIXED_SCRATCH + N_SELFTEST_STATES * SYMCRYPT_PARALLEL_HASH_PER_STATE_SCRATCH]; + int i; + + SymCryptParallelSha384Init( &states[0], N_SELFTEST_STATES ); + + for( i=0; i<N_SELFTEST_STATES; i++ ) + { + op[2*i ].iHash = i; + op[2*i ].hashOperation = SYMCRYPT_HASH_OPERATION_APPEND; + op[2*i ].pbBuffer = (PBYTE) SymCryptTestMsg3; + op[2*i ].cbBuffer = sizeof(SymCryptTestMsg3); + op[2*i + 1].iHash = i; + op[2*i + 1].hashOperation = SYMCRYPT_HASH_OPERATION_RESULT; + op[2*i + 1].pbBuffer = &result[i][0]; + op[2*i + 1].cbBuffer = SYMCRYPT_SHA384_RESULT_SIZE; + } + + scError = SymCryptParallelSha384Process( &states[0], N_SELFTEST_STATES, op, 2*N_SELFTEST_STATES, scratch, sizeof( scratch ) ); + if( scError != SYMCRYPT_NO_ERROR ) + { + SymCryptFatal( 'PS38' ); + } + + for( i=0; i<N_SELFTEST_STATES; i++ ) + { + SymCryptInjectError( &result[i][0], SYMCRYPT_SHA384_RESULT_SIZE ); + + if( memcmp( &result[i][0], SymCryptSha384KATAnswer, SYMCRYPT_SHA384_RESULT_SIZE ) != 0 ) { + SymCryptFatal( 'PS38' ); + } + } +} + +VOID +SYMCRYPT_CALL +SymCryptParallelSha512Selftest(void) +{ + SYMCRYPT_ERROR scError; + SYMCRYPT_SHA512_STATE states[N_SELFTEST_STATES]; + BYTE result[N_SELFTEST_STATES][SYMCRYPT_SHA512_RESULT_SIZE]; + SYMCRYPT_PARALLEL_HASH_OPERATION op[2*N_SELFTEST_STATES]; + BYTE scratch[SYMCRYPT_PARALLEL_SHA512_FIXED_SCRATCH + N_SELFTEST_STATES * SYMCRYPT_PARALLEL_HASH_PER_STATE_SCRATCH]; + int i; + + SymCryptParallelSha512Init( &states[0], N_SELFTEST_STATES ); + + for( i=0; i<N_SELFTEST_STATES; i++ ) + { + op[2*i ].iHash = i; + op[2*i ].hashOperation = SYMCRYPT_HASH_OPERATION_APPEND; + op[2*i ].pbBuffer = (PBYTE) SymCryptTestMsg3; + op[2*i ].cbBuffer = sizeof(SymCryptTestMsg3); + op[2*i + 1].iHash = i; + op[2*i + 1].hashOperation = SYMCRYPT_HASH_OPERATION_RESULT; + op[2*i + 1].pbBuffer = &result[i][0]; + op[2*i + 1].cbBuffer = SYMCRYPT_SHA512_RESULT_SIZE; + } + + scError = SymCryptParallelSha512Process( &states[0], N_SELFTEST_STATES, op, 2*N_SELFTEST_STATES, scratch, sizeof( scratch ) ); + if( scError != SYMCRYPT_NO_ERROR ) + { + SymCryptFatal( 'PS51' ); + } + + for( i=0; i<N_SELFTEST_STATES; i++ ) + { + SymCryptInjectError( &result[i][0], SYMCRYPT_SHA512_RESULT_SIZE ); + + if( memcmp( &result[i][0], SymCryptSha512KATAnswer, SYMCRYPT_SHA512_RESULT_SIZE ) != 0 ) { + SymCryptFatal( 'PS51' ); + } + } +} diff --git a/libs/symcrypt/lib/shake.c b/libs/symcrypt/lib/shake.c new file mode 100644 index 00000000000..1850bc7be04 --- /dev/null +++ b/libs/symcrypt/lib/shake.c @@ -0,0 +1,337 @@ +// +// Shake.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +// +// See the symcrypt.h file for documentation on what the various functions do. +// + + + +// +// SHAKE128 +// +#define Alg Shake128 +#define ALG SHAKE128 +#define SYMCRYPT_SHAKEXXX_INPUT_BLOCK_SIZE SYMCRYPT_SHAKE128_INPUT_BLOCK_SIZE +#define SYMCRYPT_SHAKEXXX_RESULT_SIZE SYMCRYPT_SHAKE128_RESULT_SIZE +#include "shake_pattern.c" +#undef SYMCRYPT_SHAKEXXX_RESULT_SIZE +#undef SYMCRYPT_SHAKEXXX_INPUT_BLOCK_SIZE +#undef ALG +#undef Alg + +const SYMCRYPT_HASH SymCryptShake128HashAlgorithm_default = { + &SymCryptShake128Init, + &SymCryptShake128Append, + &SymCryptShake128Result, + NULL, // AppendBlocks function is not implemented for SHA-3 + &SymCryptShake128StateCopy, + sizeof(SYMCRYPT_SHAKE128_STATE), + SYMCRYPT_SHAKE128_RESULT_SIZE, + SYMCRYPT_SHAKE128_INPUT_BLOCK_SIZE, + SYMCRYPT_FIELD_OFFSET(SYMCRYPT_SHAKE128_STATE, ks.state), + SYMCRYPT_FIELD_SIZE(SYMCRYPT_SHAKE128_STATE, ks.state), +}; + +const PCSYMCRYPT_HASH SymCryptShake128HashAlgorithm = &SymCryptShake128HashAlgorithm_default; + +static const BYTE shake128KATAnswer[SYMCRYPT_SHAKE128_RESULT_SIZE] = { + 0x58, 0x81, 0x09, 0x2d, 0xd8, 0x18, 0xbf, 0x5c, + 0xf8, 0xa3, 0xdd, 0xb7, 0x93, 0xfb, 0xcb, 0xa7, + 0x40, 0x97, 0xd5, 0xc5, 0x26, 0xa6, 0xd3, 0x5f, + 0x97, 0xb8, 0x33, 0x51, 0x94, 0x0f, 0x2c ,0xc8 +}; + +VOID +SYMCRYPT_CALL +SymCryptShake128Selftest(void) +{ + BYTE result[SYMCRYPT_SHAKE128_RESULT_SIZE]; + + SymCryptShake128(SymCryptTestMsg3, sizeof(SymCryptTestMsg3), result, sizeof(result)); + + SymCryptInjectError(result, sizeof(result)); + + if (memcmp(result, shake128KATAnswer, sizeof(result)) != 0) + { + SymCryptFatal('shk1'); + } +} + + +// +// SHAKE256 +// +#define Alg Shake256 +#define ALG SHAKE256 +#define SYMCRYPT_SHAKEXXX_INPUT_BLOCK_SIZE SYMCRYPT_SHAKE256_INPUT_BLOCK_SIZE +#define SYMCRYPT_SHAKEXXX_RESULT_SIZE SYMCRYPT_SHAKE256_RESULT_SIZE +#include "shake_pattern.c" +#undef SYMCRYPT_SHAKEXXX_RESULT_SIZE +#undef SYMCRYPT_SHAKEXXX_INPUT_BLOCK_SIZE +#undef ALG +#undef Alg + +const SYMCRYPT_HASH SymCryptShake256HashAlgorithm_default = { + &SymCryptShake256Init, + &SymCryptShake256Append, + &SymCryptShake256Result, + NULL, // AppendBlocks function is not implemented for SHA-3 + &SymCryptShake256StateCopy, + sizeof(SYMCRYPT_SHAKE256_STATE), + SYMCRYPT_SHAKE256_RESULT_SIZE, + SYMCRYPT_SHAKE256_INPUT_BLOCK_SIZE, + SYMCRYPT_FIELD_OFFSET(SYMCRYPT_SHAKE256_STATE, ks.state), + SYMCRYPT_FIELD_SIZE(SYMCRYPT_SHAKE256_STATE, ks.state), +}; + +const PCSYMCRYPT_HASH SymCryptShake256HashAlgorithm = &SymCryptShake256HashAlgorithm_default; + +static const BYTE shake256KATAnswer[SYMCRYPT_SHAKE256_RESULT_SIZE] = { + 0x48, 0x33, 0x66, 0x60, 0x13, 0x60, 0xa8, 0x77, 0x1c, 0x68, 0x63, 0x08, 0x0c, 0xc4, 0x11, 0x4d, + 0x8d, 0xb4, 0x45, 0x30, 0xf8, 0xf1, 0xe1, 0xee, 0x4f, 0x94, 0xea, 0x37, 0xe7, 0x8b, 0x57, 0x39, + 0xd5, 0xa1, 0x5b, 0xef, 0x18, 0x6a, 0x53, 0x86, 0xc7, 0x57, 0x44, 0xc0, 0x52, 0x7e, 0x1f, 0xaa, + 0x9f, 0x87, 0x26, 0xe4, 0x62, 0xa1, 0x2a, 0x4f, 0xeb, 0x06, 0xbd, 0x88, 0x01, 0xe7, 0x51, 0xe4 +}; + +VOID +SYMCRYPT_CALL +SymCryptShake256Selftest(void) +{ + BYTE result[SYMCRYPT_SHAKE256_RESULT_SIZE]; + + SymCryptShake256(SymCryptTestMsg3, sizeof(SymCryptTestMsg3), result, sizeof(result)); + + SymCryptInjectError(result, sizeof(result)); + + if (memcmp(result, shake256KATAnswer, sizeof(result)) != 0) + { + SymCryptFatal('shk2'); + } +} + + +// +// CSHAKE128 +// +#define Alg CShake128 +#define ALG CSHAKE128 +#define SYMCRYPT_SHAKEXXX_INIT SymCryptShake128Init +#define SYMCRYPT_SHAKEXXX_STATE SYMCRYPT_SHAKE128_STATE +#define SYMCRYPT_CSHAKEXXX_INPUT_BLOCK_SIZE SYMCRYPT_CSHAKE128_INPUT_BLOCK_SIZE +#define SYMCRYPT_CSHAKEXXX_RESULT_SIZE SYMCRYPT_CSHAKE128_RESULT_SIZE +#include "cshake_pattern.c" +#undef SYMCRYPT_CSHAKEXXX_RESULT_SIZE +#undef SYMCRYPT_CSHAKEXXX_INPUT_BLOCK_SIZE +#undef SYMCRYPT_SHAKEXXX_STATE +#undef SYMCRYPT_SHAKEXXX_INIT +#undef ALG +#undef Alg + + +static const BYTE cshake128KATAnswer[SYMCRYPT_CSHAKE128_RESULT_SIZE] = { + 0x14, 0xe5, 0xdf, 0xf3, 0xae, 0xfd, 0xfe, 0x8e, + 0xa6, 0xae, 0xed, 0xfd, 0x99, 0xe6, 0x84, 0x74, + 0xbc, 0x61, 0xb9, 0xd6, 0x17, 0x4e, 0x9f, 0x4a, + 0xe3, 0xbd, 0x87, 0xdf, 0x0e, 0xf2, 0x16, 0xdb, +}; + +VOID +SYMCRYPT_CALL +SymCryptCShake128Selftest(void) +{ + BYTE result[SYMCRYPT_CSHAKE128_RESULT_SIZE]; + static const unsigned char Nstr[] = { 'N' }; + static const unsigned char Sstr[] = { 'S' }; + + SymCryptCShake128( Nstr, sizeof(Nstr), + Sstr, sizeof(Sstr), + SymCryptTestMsg3, sizeof(SymCryptTestMsg3), + result, sizeof(result)); + + SymCryptInjectError(result, sizeof(result)); + + if (memcmp(result, cshake128KATAnswer, sizeof(result)) != 0) + { + SymCryptFatal('cshk'); + } +} + + +// +// CSHAKE256 +// +#define Alg CShake256 +#define ALG CSHAKE256 +#define SYMCRYPT_SHAKEXXX_INIT SymCryptShake256Init +#define SYMCRYPT_SHAKEXXX_STATE SYMCRYPT_SHAKE256_STATE +#define SYMCRYPT_CSHAKEXXX_INPUT_BLOCK_SIZE SYMCRYPT_CSHAKE256_INPUT_BLOCK_SIZE +#define SYMCRYPT_CSHAKEXXX_RESULT_SIZE SYMCRYPT_CSHAKE256_RESULT_SIZE +#include "cshake_pattern.c" +#undef SYMCRYPT_CSHAKEXXX_RESULT_SIZE +#undef SYMCRYPT_CSHAKEXXX_INPUT_BLOCK_SIZE +#undef SYMCRYPT_SHAKEXXX_STATE +#undef SYMCRYPT_SHAKEXXX_INIT +#undef ALG +#undef Alg + + +static const BYTE cshake256KATAnswer[SYMCRYPT_CSHAKE256_RESULT_SIZE] = { + 0x4d, 0xe8, 0x71, 0x6c, 0x4a, 0x16, 0x7e, 0x28, 0x2c, 0x18, 0xc5, 0x1e, 0xed, 0xa6, 0x00, 0xb8, + 0x91, 0x92, 0x4f, 0xea, 0x2e, 0x20, 0x7f, 0x71, 0x2c, 0xfd, 0xe2, 0x95, 0xfd, 0x1c, 0x67, 0x32, + 0x31, 0x49, 0x98, 0x23, 0xc0, 0x5e, 0x6a, 0xe3, 0x89, 0xad, 0x4d, 0xa2, 0x32, 0x9c, 0xc9, 0x2e, + 0x0f, 0xd6, 0x90, 0xb9, 0xee, 0x91, 0x0e, 0x86, 0xf7, 0x1d, 0x03, 0x88, 0xb5, 0x95, 0x61, 0x95 +}; + +VOID +SYMCRYPT_CALL +SymCryptCShake256Selftest(void) +{ + BYTE result[SYMCRYPT_CSHAKE256_RESULT_SIZE]; + static const unsigned char Nstr[] = { 'N' }; + static const unsigned char Sstr[] = { 'S' }; + + SymCryptCShake256(Nstr, sizeof(Nstr), + Sstr, sizeof(Sstr), + SymCryptTestMsg3, sizeof(SymCryptTestMsg3), + result, sizeof(result)); + + SymCryptInjectError(result, sizeof(result)); + + if (memcmp(result, cshake256KATAnswer, sizeof(result)) != 0) + { + SymCryptFatal('cshk'); + } +} + +// +// CShake helper functions +// + +// +// SymCryptCShakeEncodeInputStrings +// +VOID +SYMCRYPT_CALL +SymCryptCShakeEncodeInputStrings( + _Inout_ PSYMCRYPT_KECCAK_STATE pState, + _In_reads_( cbFunctionNameString ) PCBYTE pbFunctionNameString, + SIZE_T cbFunctionNameString, + _In_reads_( cbCustomizationString ) PCBYTE pbCustomizationString, + SIZE_T cbCustomizationString) +{ + SYMCRYPT_ASSERT((cbFunctionNameString > 0) || (cbCustomizationString > 0)); + + // left_encode( inputBlockSize ) for byte_pad function + // + // SymCryptKeccakEncodeTimes8 function encodes 8 times the value passed to + // it. Here, we want the actual value of pState->inputBlockSize to be encoded, + // hence the division by 8. + SymCryptKeccakAppendEncodeTimes8(pState, pState->inputBlockSize / 8, TRUE); + + SymCryptKeccakAppendEncodedString(pState, pbFunctionNameString, cbFunctionNameString); + SymCryptKeccakAppendEncodedString(pState, pbCustomizationString, cbCustomizationString); + + // Appending of Customization String may have already called the permutation + // if the appended data is aligned to input block size, in which case the zero + // padding has been done. + if (pState->stateIndex != 0) + { + SymCryptKeccakZeroAppendBlock(pState); + } +} + +// +// SymCryptKeccakEncodeTimes8 +// +SIZE_T +SYMCRYPT_CALL +SymCryptKeccakEncodeTimes8( + UINT64 uInput, + _Out_writes_(cbOutput) PBYTE pbOutput, + SIZE_T cbOutput, + BOOLEAN bLeftEncode) +{ + BYTE encoding[1 + sizeof(UINT64)]; + SIZE_T ret = 0; + + // longest encoding is 1 byte for length + 9 bytes for uInput * 8 + SYMCRYPT_ASSERT(cbOutput >= (1 + sizeof(encoding))); + UNREFERENCED_PARAMETER(cbOutput); + + // + // encoding[0] .. encoding[8] will contain (uInput * 8) in big endian form + encoding[0] = (BYTE)(uInput >> 61); + SYMCRYPT_STORE_MSBFIRST64(&encoding[1], uInput * 8); + + SIZE_T length = 1; // number of bytes required to encode uInput + PCBYTE pbMsb = &encoding[sizeof(encoding) - 1]; // pointer to the most significant byte + + // Locate the most significant non-zero byte + for (int i = 0; i < sizeof(encoding); i++) + { + // Do not early terminate on the most significant byte + if (encoding[i] != 0 && length == 1) + { + length = sizeof(encoding) - i; + pbMsb = &encoding[i]; + } + } + + ret = 1 + length; + + if (bLeftEncode) + { + // length for left_encode + *pbOutput++ = (BYTE)length; + } + + memcpy(pbOutput, pbMsb, length); + + if(!bLeftEncode) + { + // length for right_encode + pbOutput[length] = (BYTE)length; + } + + return ret; // total number of bytes written to pbOutput +} + +// +// SymCryptKeccakAppendEncodeTimes8 +// +VOID +SYMCRYPT_CALL +SymCryptKeccakAppendEncodeTimes8( + _Inout_ SYMCRYPT_KECCAK_STATE *pState, + UINT64 uValue, + BOOLEAN bLeftEncode) + +{ + BYTE encoding[1 + (1 + sizeof(UINT64))]; + SIZE_T ret; + + ret = SymCryptKeccakEncodeTimes8(uValue, encoding, sizeof(encoding), bLeftEncode); + + SymCryptKeccakAppend(pState, encoding, ret); +} + + +// +// SymCryptKeccakAppendEncodedString +// +VOID +SYMCRYPT_CALL +SymCryptKeccakAppendEncodedString( + _Inout_ PSYMCRYPT_KECCAK_STATE pState, + _In_reads_(cbString) PCBYTE pbString, + SIZE_T cbString) +{ + SymCryptKeccakAppendEncodeTimes8(pState, cbString, TRUE); + SymCryptKeccakAppend(pState, pbString, cbString); +} diff --git a/libs/symcrypt/lib/shake_pattern.c b/libs/symcrypt/lib/shake_pattern.c new file mode 100644 index 00000000000..59551a391f8 --- /dev/null +++ b/libs/symcrypt/lib/shake_pattern.c @@ -0,0 +1,111 @@ +// +// shake_pattern.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#if 0 +#pragma makedep header +#endif + +// +// This source file implements SHAKE128 and SHAKE256 +// +// See the symcrypt.h file for documentation on what the various functions do. +// + +// +// SymCryptShake +// +VOID +SYMCRYPT_CALL +SYMCRYPT_XxxDefault( + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( SYMCRYPT_SHAKEXXX_RESULT_SIZE ) PBYTE pbResult) +{ + SYMCRYPT_Xxx(pbData, cbData, pbResult, SYMCRYPT_SHAKEXXX_RESULT_SIZE); +} + +// +// SymCryptShakeEx +// +VOID +SYMCRYPT_CALL +SYMCRYPT_Xxx( + _In_reads_( cbData ) PCBYTE pbData, + SIZE_T cbData, + _Out_writes_( cbResult ) PBYTE pbResult, + SIZE_T cbResult) +{ + SYMCRYPT_XXX_STATE state; + + SYMCRYPT_XxxInit(&state); + SYMCRYPT_XxxAppend(&state, pbData, cbData); + SYMCRYPT_XxxExtract(&state, pbResult, cbResult, TRUE); +} + +// +// SymCryptShakeStateCopy +// +VOID +SYMCRYPT_CALL +SYMCRYPT_XxxStateCopy(_In_ const SYMCRYPT_XXX_STATE* pSrc, _Out_ SYMCRYPT_XXX_STATE* pDst) +{ + SYMCRYPT_CHECK_MAGIC(pSrc); + *pDst = *pSrc; + SYMCRYPT_SET_MAGIC(pDst); +} + +// +// SymCryptShakeInit +// +VOID +SYMCRYPT_CALL +SYMCRYPT_XxxInit(_Out_ PSYMCRYPT_XXX_STATE pState) +{ + SymCryptKeccakInit(&pState->ks, + SYMCRYPT_SHAKEXXX_INPUT_BLOCK_SIZE, + SYMCRYPT_SHAKE_PADDING_VALUE); + + SYMCRYPT_SET_MAGIC(pState); +} + +// +// SymCryptShakeAppend +// +VOID +SYMCRYPT_CALL +SYMCRYPT_XxxAppend( + _Inout_ PSYMCRYPT_XXX_STATE pState, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData) +{ + SymCryptKeccakAppend(&pState->ks, pbData, cbData); +} + +// +// SymCryptShakeExtract +// +VOID +SYMCRYPT_CALL +SYMCRYPT_XxxExtract( + _Inout_ PSYMCRYPT_XXX_STATE pState, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult, + BOOLEAN bWipe) +{ + SymCryptKeccakExtract(&pState->ks, pbResult, cbResult, bWipe); +} + +// +// SymCryptShakeResult +// +VOID +SYMCRYPT_CALL +SYMCRYPT_XxxResult( + _Inout_ PSYMCRYPT_XXX_STATE pState, + _Out_writes_(SYMCRYPT_SHAKEXXX_RESULT_SIZE) PBYTE pbResult) +{ + SymCryptKeccakExtract(&pState->ks, pbResult, SYMCRYPT_SHAKEXXX_RESULT_SIZE, TRUE); +} diff --git a/libs/symcrypt/lib/smallPrimes32.h b/libs/symcrypt/lib/smallPrimes32.h new file mode 100644 index 00000000000..8afad4c9380 --- /dev/null +++ b/libs/symcrypt/lib/smallPrimes32.h @@ -0,0 +1,29 @@ +// +// Parameters for trial division mechanism +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// GENERATED FILE, DO NOT EDIT. +// + + +// +// The primes are put into groups of consecutive primes (skipping 2, 3, 5, and 17). +// Each group has a product less than SYMCRYPT_MAX_SMALL_PRIME_GROUP_PRODUCT which is +// chosen to avoid overflows in the modular reduction computation. +// + +typedef struct _SYMCRYPT_SMALL_PRIME_GROUPS_SPEC { + UINT16 nGroups; // # groups of this size + UINT8 nPrimes; // # primes in the group + UINT32 maxPrime; // largest prime in the last group +} SYMCRYPT_SMALL_PRIME_GROUPS_SPEC; + +#define SYMCRYPT_MAX_SMALL_PRIME_GROUP_PRODUCT (0x1c71c71cU) + +const SYMCRYPT_SMALL_PRIME_GROUPS_SPEC g_SymCryptSmallPrimeGroupsSpec[] = { + { 1, 7, 31 }, + { 1, 5, 53 }, + { 5, 4, 151 }, + { 34, 3, 787 }, + { 1156, 2, 21841 }, + { 0, 1, 0xffffffff }, +}; diff --git a/libs/symcrypt/lib/sp800_108.c b/libs/symcrypt/lib/sp800_108.c new file mode 100644 index 00000000000..56690caa1a4 --- /dev/null +++ b/libs/symcrypt/lib/sp800_108.c @@ -0,0 +1,143 @@ +// +// sp800_108.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +// +// This module contains the routines to implement the SP800-108 CTR KDF function +// +// + +#include "precomp.h" + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSp800_108Derive( + _In_ PCSYMCRYPT_SP800_108_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbLabel) PCBYTE pbLabel, + SIZE_T cbLabel, + _In_reads_opt_(cbContext) PCBYTE pbContext, + SIZE_T cbContext, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult) +{ + SYMCRYPT_MAC_STATE macState; + UINT32 iBlock; + SIZE_T bytes; + SIZE_T blockSize = pExpandedKey->macAlg->resultSize; + SIZE_T bytesRemaining = cbResult; + BYTE buf[4]; + SYMCRYPT_ALIGN BYTE rbBlockResult[SYMCRYPT_MAC_MAX_RESULT_SIZE]; + + SYMCRYPT_ASSERT( + blockSize <= SYMCRYPT_MAC_MAX_RESULT_SIZE && + bytesRemaining > 0 ); + + if( cbResult > UINT32_MAX/8 ) + { + // SP800-108 requires the output size in bits to be encoded in a 32-bit value. + // cbResults that are too large are impossible. + return SYMCRYPT_INVALID_ARGUMENT; + } + + iBlock = 0; + while( bytesRemaining > 0 ) + { + iBlock += 1; + pExpandedKey->macAlg->initFunc ( &macState, &pExpandedKey->macKey); + + // + // We append the pieces into the MAC function. This is inefficient but works always. + // If we need more speed for large outputs, we could use a fixed-size stack buffer to build the + // concatenation & do a single append. This reduces the # calls in the loop, but adds one memcpy to + // the parameters. For small output sizes this is probably a wash. + // + + SYMCRYPT_STORE_MSBFIRST32( &buf[0], iBlock ); + pExpandedKey->macAlg->appendFunc( &macState, &buf[0], 4 ); // block count encoded in 4 bytes + + if( cbLabel != (SIZE_T) -1 ) + { + // + // cbLabel == -1 signals a generic input in the Context field. + // + pExpandedKey->macAlg->appendFunc( &macState, pbLabel, cbLabel ); // label + + buf[0] = 0; + pExpandedKey->macAlg->appendFunc( &macState, &buf[0], 1 ); // zero byte + } + + pExpandedKey->macAlg->appendFunc( &macState, pbContext, cbContext); // Context + + SYMCRYPT_STORE_MSBFIRST32( &buf[0], 8 * (UINT32)cbResult ); + pExpandedKey->macAlg->appendFunc( &macState, &buf[0], 4 ); // output length, in bits + + pExpandedKey->macAlg->resultFunc( &macState, rbBlockResult ); + + bytes = SYMCRYPT_MIN( bytesRemaining, blockSize ); + memcpy( pbResult, rbBlockResult, bytes ); + pbResult += bytes; + bytesRemaining -= bytes; + } + + SymCryptWipeKnownSize( rbBlockResult, sizeof( rbBlockResult ) ); + return SYMCRYPT_NO_ERROR; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSp800_108ExpandKey( + _Out_ PSYMCRYPT_SP800_108_EXPANDED_KEY pExpandedKey, + _In_ PCSYMCRYPT_MAC macAlgorithm, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ) +{ + SYMCRYPT_ASSERT( macAlgorithm->expandedKeySize <= sizeof( pExpandedKey->macKey ) ); + + pExpandedKey->macAlg = macAlgorithm; + return macAlgorithm->expandKeyFunc(&pExpandedKey->macKey, pbKey, cbKey ); +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSp800_108( + PCSYMCRYPT_MAC macAlgorithm, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey, + _In_reads_opt_(cbLabel) PCBYTE pbLabel, + SIZE_T cbLabel, + _In_reads_opt_(cbContext) PCBYTE pbContext, + SIZE_T cbContext, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult) +{ + SYMCRYPT_SP800_108_EXPANDED_KEY key; + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + scError = SymCryptSp800_108ExpandKey( &key, macAlgorithm, pbKey, cbKey ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + scError = SymCryptSp800_108Derive( &key, pbLabel, cbLabel, pbContext, cbContext, pbResult, cbResult ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + +cleanup: + + SymCryptWipeKnownSize( &key, sizeof( key ) ); + + return scError; + +} + + +// +// Self tests are in sp800_108_*.c files +// to avoid pulling in SHA-1 when only SP800-108-SHA256 is used and +// similar scenarios. +// diff --git a/libs/symcrypt/lib/sp800_108_hmacsha1.c b/libs/symcrypt/lib/sp800_108_hmacsha1.c new file mode 100644 index 00000000000..691a6b2a73b --- /dev/null +++ b/libs/symcrypt/lib/sp800_108_hmacsha1.c @@ -0,0 +1,39 @@ +// +// sp800_108_hmacsha1.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +// +// The SP800-108 SHA-1 test +// This is in a separate module to avoid pulling in SHA-1 whenever we use SP800_108 +// + +static const BYTE sp800_108_sha1Answer[] = +{ + 0xcf, 0x4b, 0xfe, 0x4f, 0x85, 0xa1, 0x0b, 0xad, +}; + +VOID +SYMCRYPT_CALL +SymCryptSp800_108_HmacSha1SelfTest(void) +{ + BYTE res[sizeof(sp800_108_sha1Answer)]; + + SymCryptSp800_108( + SymCryptHmacSha1Algorithm, + &SymCryptTestKey32[0], 8, // key + (PCBYTE)"Label", 5, // label + &SymCryptTestKey32[16], 16, // context + res, + sizeof(res)); + + SymCryptInjectError( res, sizeof( res ) ); + + if (memcmp(res, sp800_108_sha1Answer, sizeof(res)) !=0) + { + SymCryptFatal('8108'); + } +} diff --git a/libs/symcrypt/lib/sp800_108_hmacsha256.c b/libs/symcrypt/lib/sp800_108_hmacsha256.c new file mode 100644 index 00000000000..69ecc59669c --- /dev/null +++ b/libs/symcrypt/lib/sp800_108_hmacsha256.c @@ -0,0 +1,39 @@ +// +// sp800_108_hmacsha256.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +// +// The SP800-108 SHA-256 test +// This is in a separate module to avoid pulling in SHA-256 whenever we use SP800_108 +// + +static const BYTE sp800_108_sha256Answer[] = +{ + 0x00, 0x26, 0x4b, 0xbb, 0x14, 0x97, 0x40, 0x54, +}; + +VOID +SYMCRYPT_CALL +SymCryptSp800_108_HmacSha256SelfTest(void) +{ + BYTE res[sizeof(sp800_108_sha256Answer)]; + + SymCryptSp800_108( + SymCryptHmacSha256Algorithm, + &SymCryptTestKey32[0], 8, // key + (PCBYTE)"Label", 5, // label + &SymCryptTestKey32[16], 16, // context + res, + sizeof(res)); + + SymCryptInjectError( res, sizeof( res ) ); + + if (memcmp(res, sp800_108_sha256Answer, sizeof(res)) !=0) + { + SymCryptFatal('8108'); + } +} diff --git a/libs/symcrypt/lib/sp800_108_hmacsha512.c b/libs/symcrypt/lib/sp800_108_hmacsha512.c new file mode 100644 index 00000000000..bbb9fa0fdae --- /dev/null +++ b/libs/symcrypt/lib/sp800_108_hmacsha512.c @@ -0,0 +1,66 @@ +// +// sp800_108_hmacsha512.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +// +// The SP800-108 SHA-384 / SHA-512 self tests +// This is in a separate module to avoid pulling in SHA-512 whenever we use SP800_108 +// + +static const BYTE sp800_108_sha384Answer[] = +{ + 0xc7, 0x10, 0x27, 0x87, 0xd8, 0x96, 0xbc, 0x89, +}; + +VOID +SYMCRYPT_CALL +SymCryptSp800_108_HmacSha384SelfTest(void) +{ + BYTE res[sizeof(sp800_108_sha384Answer)]; + + SymCryptSp800_108( + SymCryptHmacSha384Algorithm, + &SymCryptTestKey32[0], 8, // key + (PCBYTE)"Label", 5, // label + &SymCryptTestKey32[16], 16, // context + res, + sizeof(res)); + + SymCryptInjectError( res, sizeof( res ) ); + + if (memcmp(res, sp800_108_sha384Answer, sizeof(res)) !=0) + { + SymCryptFatal('8108'); + } +} + +static const BYTE sp800_108_sha512Answer[] = +{ + 0xdb, 0x3a, 0x18, 0xd9, 0x6c, 0x4a, 0xd4, 0x1e, +}; + +VOID +SYMCRYPT_CALL +SymCryptSp800_108_HmacSha512SelfTest(void) +{ + BYTE res[sizeof(sp800_108_sha512Answer)]; + + SymCryptSp800_108( + SymCryptHmacSha512Algorithm, + &SymCryptTestKey32[0], 8, // key + (PCBYTE)"Label", 5, // label + &SymCryptTestKey32[16], 16, // context + res, + sizeof(res)); + + SymCryptInjectError( res, sizeof( res ) ); + + if (memcmp(res, sp800_108_sha512Answer, sizeof(res)) !=0) + { + SymCryptFatal('8108'); + } +} diff --git a/libs/symcrypt/lib/srtp_kdf.c b/libs/symcrypt/lib/srtp_kdf.c new file mode 100644 index 00000000000..34c7f4837d7 --- /dev/null +++ b/libs/symcrypt/lib/srtp_kdf.c @@ -0,0 +1,175 @@ +// +// srtp_kdf.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +// +// This module implements SRTP-KDF specified in RFC 3711 Section 4.3.1. +// + +#include "precomp.h" + + +#define SYMCRYPT_SRTP_KDF_SALT_SIZE (112 / 8) + + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSrtpKdfExpandKey( + _Out_ PSYMCRYPT_SRTPKDF_EXPANDED_KEY pExpandedKey, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey) +{ + return SymCryptAesExpandKeyEncryptOnly(&pExpandedKey->aesExpandedKey, pbKey, cbKey); +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSrtpKdfDerive( + _In_ PCSYMCRYPT_SRTPKDF_EXPANDED_KEY pExpandedKey, + _In_reads_(cbSalt) PCBYTE pbSalt, + SIZE_T cbSalt, + UINT32 uKeyDerivationRate, + UINT64 uIndex, + UINT32 uIndexWidth, + BYTE label, + _Out_writes_(cbOutput) PBYTE pbOutput, + SIZE_T cbOutput) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + BYTE X[16] = { 0 }; + + // + // uIndexWidth must be one of 0, 32 or 48. RFC 3711 defines SRTP indices to be + // 48-bits. SRTCP indices were first specified as 32-bit values and then updated to + // 48-bits by Errata ID 3712. uIndexWidth parameter allows specifying the width of + // the uIndex parameter for both SRTP and SRTCP indices. The test vectors use + // 32-bit SRTCP index values. + // + // The default value of 0 is equivalent to setting uIndexWidth to 48. + if (uIndexWidth == 0) + { + uIndexWidth = 48; + } + else if (uIndexWidth != 32 && uIndexWidth != 48) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if (cbSalt != SYMCRYPT_SRTP_KDF_SALT_SIZE) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // uKeyDerivationRate must be zero or 2^i for 0 <= i <= 24. + // This is verified by checking both it is not greater than 2^24 and it is either zero or a power of two. + if( (uKeyDerivationRate > (1 << 24)) || ((uKeyDerivationRate & (uKeyDerivationRate - 1)) != 0) ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Initialize X to Salt || 0 + memcpy(X, pbSalt, cbSalt); + + // (uIndex DIV uKeyDerivationRate) operation can be performed with a right shift as + // uKeyDerivationRate is either zero or a power of 2. When uKeyDerivationRate is zero, + // DIV operation should evaluate to zero, which can be performed by shifting uIndex by 48 bits, + // i.e., maximum value it may have. + UINT32 kdrShift = 48; + if (uKeyDerivationRate) + { + for (UINT32 i = 0; i <= 24; i++) + { + if (uKeyDerivationRate == (1UL << i)) + { + kdrShift = i; + break; + } + } + } + + UINT64 r = uIndex >> kdrShift; + + UINT64 key_id = ((UINT64)label << uIndexWidth) | r; + + // XOR key_id into salt + // + // X = S0 ... |S6 ... S13| 0 0 + // | key_id | + // + PBYTE pbXorPos = &X[SYMCRYPT_SRTP_KDF_SALT_SIZE - sizeof(key_id)]; + UINT64 uSaltLsb = SYMCRYPT_LOAD_MSBFIRST64(pbXorPos); + SYMCRYPT_STORE_MSBFIRST64(pbXorPos, uSaltLsb ^ key_id); + + // + // We break the read-once/write once rule here by writing to the pbOutput buffer twice. + // The first write wipes the buffer so that we get the raw keystream bytes from AES-CTR encryption. + // The second write to pbOutput occurs with the SymCryptAesCtrMsb64() call that produces the keystream bytes. + // + // Modification of pbOutput between the two calls does not leak any information, it just results in flipping of the + // corresponding bits of the correct output. + SymCryptWipe(pbOutput, cbOutput); + SymCryptAesCtrMsb64(&pExpandedKey->aesExpandedKey, X, pbOutput, pbOutput, cbOutput & ~0xf); + + // SymCryptAesCtrMsb64 only processes full blocks. If cbOutput is not a multiple of 16 we generate the last block of + // keystream to local buffer and copy the necessary number of bytes to output. + if (cbOutput & 0xf) + { + BYTE lastBlockBytes[16] = { 0 }; + + SymCryptAesCtrMsb64(&pExpandedKey->aesExpandedKey, X, lastBlockBytes, lastBlockBytes, 16); + + memcpy(pbOutput + 16 * (cbOutput / 16), lastBlockBytes, cbOutput & 0xf); + + SymCryptWipeKnownSize(lastBlockBytes, sizeof(lastBlockBytes)); + } + +cleanup: + + return scError; +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSrtpKdf( + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey, + _In_reads_(cbSalt) PCBYTE pbSalt, + SIZE_T cbSalt, + UINT32 uKeyDerivationRate, + UINT64 uIndex, + UINT32 uIndexWidth, + BYTE label, + _Out_writes_(cbOutput) PBYTE pbOutput, + SIZE_T cbOutput) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + SYMCRYPT_SRTPKDF_EXPANDED_KEY expandedKey; + + scError = SymCryptSrtpKdfExpandKey(&expandedKey, pbKey, cbKey); + + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + scError = SymCryptSrtpKdfDerive(&expandedKey, + pbSalt, cbSalt, + uKeyDerivationRate, + uIndex, uIndexWidth, + label, + pbOutput, cbOutput); + +cleanup: + + SymCryptWipeKnownSize(&expandedKey, sizeof(expandedKey)); + + return scError; +} diff --git a/libs/symcrypt/lib/ssh_kdf.c b/libs/symcrypt/lib/ssh_kdf.c new file mode 100644 index 00000000000..b1d961107d0 --- /dev/null +++ b/libs/symcrypt/lib/ssh_kdf.c @@ -0,0 +1,122 @@ +// +// ssh_kdf.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +// +// This module implements SSH-KDF specified in RFC 4253 Section 7.2. +// + +#include "precomp.h" + + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSshKdfExpandKey( + _Out_ PSYMCRYPT_SSHKDF_EXPANDED_KEY pExpandedKey, + _In_ PCSYMCRYPT_HASH pHashFunc, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey) +{ + pExpandedKey->pHashFunc = pHashFunc; + + SymCryptHashInit(pHashFunc, &pExpandedKey->hashState); + SymCryptHashAppend(pHashFunc, &pExpandedKey->hashState, pbKey, cbKey); + + return SYMCRYPT_NO_ERROR; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSshKdfDerive( + _In_ PCSYMCRYPT_SSHKDF_EXPANDED_KEY pExpandedKey, + _In_reads_(cbHashValue) PCBYTE pbHashValue, + SIZE_T cbHashValue, + BYTE label, + _In_reads_(cbSessionId) PCBYTE pbSessionId, + SIZE_T cbSessionId, + _Inout_updates_(cbOutput) PBYTE pbOutput, + SIZE_T cbOutput) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + SYMCRYPT_HASH_STATE hashState; + + PCBYTE pcbOutputSave = pbOutput; + PCSYMCRYPT_HASH pHashFunc = pExpandedKey->pHashFunc; + SIZE_T cbHashResultSize = SymCryptHashResultSize(pHashFunc); + + + while (cbOutput > 0) + { + SIZE_T cbGeneratedOutput = pbOutput - pcbOutputSave; + + SymCryptHashStateCopy(pHashFunc, &pExpandedKey->hashState, &hashState); + SymCryptHashAppend(pHashFunc, &hashState, pbHashValue, cbHashValue); // hashState has (K || H) + + // label and session ID are appended only in the first iteration + if (cbGeneratedOutput == 0) + { + SymCryptHashAppend(pHashFunc, &hashState, &label, 1); + SymCryptHashAppend(pHashFunc, &hashState, pbSessionId, cbSessionId); + } + else + { + // We break the read-once write-once rule here by appending data to a + // hash computation from pbOutput that was written by SymCryptHashResult() + // below. + // Modification of data in pbOutput buffer after it's written and before + // used again will have uncontrolled disturbances in the hash output and cannot + // be used to gain knowledge about the secret key. + SymCryptHashAppend(pHashFunc, &hashState, pcbOutputSave, cbGeneratedOutput); // hashState has (K || H || K1 .. Ki) + } + + SymCryptHashResult(pHashFunc, &hashState, pbOutput, cbOutput); + + SIZE_T bytesCopied = SYMCRYPT_MIN(cbOutput, cbHashResultSize); + + pbOutput += bytesCopied; + cbOutput -= bytesCopied; + } + + return scError; +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSshKdf( + _In_ PCSYMCRYPT_HASH pHashFunc, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey, + _In_reads_(cbHashValue) PCBYTE pbHashValue, + SIZE_T cbHashValue, + BYTE label, + _In_reads_(cbSessionId) PCBYTE pbSessionId, + SIZE_T cbSessionId, + _Out_writes_(cbOutput) PBYTE pbOutput, + SIZE_T cbOutput) +{ + SYMCRYPT_SSHKDF_EXPANDED_KEY expandedKey; + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + scError = SymCryptSshKdfExpandKey(&expandedKey, pHashFunc, pbKey, cbKey); + + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + scError = SymCryptSshKdfDerive(&expandedKey, + pbHashValue, cbHashValue, + label, + pbSessionId, cbSessionId, + pbOutput, cbOutput); + + cleanup: + + SymCryptWipeKnownSize(&expandedKey, sizeof(expandedKey)); + + return scError; +} diff --git a/libs/symcrypt/lib/ssh_kdf_sha256.c b/libs/symcrypt/lib/ssh_kdf_sha256.c new file mode 100644 index 00000000000..f324dda3ae2 --- /dev/null +++ b/libs/symcrypt/lib/ssh_kdf_sha256.c @@ -0,0 +1,65 @@ +// +// ssh_kdf_sha256.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + + + +static const BYTE pbKey[] = +{ + 0x00, 0x00, 0x00, 0x81, 0x00, 0x87, 0x5c, 0x55, 0x1c, 0xef, 0x52, 0x6a, 0x4a, 0x8b, 0xe1, 0xa7, + 0xdf, 0x27, 0xe9, 0xed, 0x35, 0x4b, 0xac, 0x9a, 0xfb, 0x71, 0xf5, 0x3d, 0xba, 0xe9, 0x05, 0x67, + 0x9d, 0x14, 0xf9, 0xfa, 0xf2, 0x46, 0x9c, 0x53, 0x45, 0x7c, 0xf8, 0x0a, 0x36, 0x6b, 0xe2, 0x78, + 0x96, 0x5b, 0xa6, 0x25, 0x52, 0x76, 0xca, 0x2d, 0x9f, 0x4a, 0x97, 0xd2, 0x71, 0xf7, 0x1e, 0x50, + 0xd8, 0xa9, 0xec, 0x46, 0x25, 0x3a, 0x6a, 0x90, 0x6a, 0xc2, 0xc5, 0xe4, 0xf4, 0x8b, 0x27, 0xa6, + 0x3c, 0xe0, 0x8d, 0x80, 0x39, 0x0a, 0x49, 0x2a, 0xa4, 0x3b, 0xad, 0x9d, 0x88, 0x2c, 0xca, 0xc2, + 0x3d, 0xac, 0x88, 0xbc, 0xad, 0xa4, 0xb4, 0xd4, 0x26, 0xa3, 0x62, 0x08, 0x3d, 0xab, 0x65, 0x69, + 0xc5, 0x4c, 0x22, 0x4d, 0xd2, 0xd8, 0x76, 0x43, 0xaa, 0x22, 0x76, 0x93, 0xe1, 0x41, 0xad, 0x16, + 0x30, 0xce, 0x13, 0x14, 0x4e +}; + +static const BYTE pbHash[] = +{ + 0x0e, 0x68, 0x3f, 0xc8, 0xa9, 0xed, 0x7c, 0x2f, 0xf0, 0x2d, 0xef, 0x23, 0xb2, 0x74, 0x5e, 0xbc, + 0x99, 0xb2, 0x67, 0xda, 0xa8, 0x6a, 0x4a, 0xa7, 0x69, 0x72, 0x39, 0x08, 0x82, 0x53, 0xf6, 0x42 +}; + +static const BYTE pbSessionId[] = +{ + 0x0e, 0x68, 0x3f, 0xc8, 0xa9, 0xed, 0x7c, 0x2f, 0xf0, 0x2d, 0xef, 0x23, 0xb2, 0x74, 0x5e, 0xbc, + 0x99, 0xb2, 0x67, 0xda, 0xa8, 0x6a, 0x4a, 0xa7, 0x69, 0x72, 0x39, 0x08, 0x82, 0x53, 0xf6, 0x42 +}; + +static const BYTE label = SYMCRYPT_SSHKDF_ENCRYPTION_KEY_CLIENT_TO_SERVER; + +static const BYTE pbResult[] = +{ + 0x4a, 0x63, 0x14, 0xd2, 0xf7, 0x51, 0x1b, 0xf8, 0x8f, 0xad, 0x39, 0xfb, 0x68, 0x92, 0xf3, 0xf2, 0x18, 0xca, 0xfd, 0x53, 0x0e, 0x72, 0xfe, 0x43 +}; + +VOID +SYMCRYPT_CALL +SymCryptSshKdfSha256SelfTest(void) +{ + SYMCRYPT_SSHKDF_EXPANDED_KEY expandedKey; + SYMCRYPT_ALIGN BYTE rbResult[sizeof(pbResult)]; + + SymCryptSshKdfExpandKey(&expandedKey, SymCryptSha256Algorithm, pbKey, sizeof(pbKey)); + + SymCryptSshKdfDerive(&expandedKey, + pbHash, sizeof(pbHash), + label, + pbSessionId, sizeof(pbSessionId), + rbResult, sizeof(rbResult) + ); + + SymCryptInjectError(rbResult, sizeof(rbResult)); + + if (memcmp(rbResult, pbResult, sizeof(pbResult)) != 0) + { + SymCryptFatal('sshk'); + } +} diff --git a/libs/symcrypt/lib/ssh_kdf_sha512.c b/libs/symcrypt/lib/ssh_kdf_sha512.c new file mode 100644 index 00000000000..909403753c1 --- /dev/null +++ b/libs/symcrypt/lib/ssh_kdf_sha512.c @@ -0,0 +1,70 @@ +// +// ssh_kdf_sha512.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + + + +static const BYTE pbKey[] = +{ + 0x00, 0x00, 0x00, 0x80, 0x57, 0x53, 0x08, 0xca, 0x39, 0x57, 0x98, 0xbb, 0x21, 0xec, 0x54, 0x38, + 0xc4, 0x6a, 0x88, 0xff, 0xa3, 0xf7, 0xf7, 0x67, 0x1c, 0x06, 0xf9, 0x24, 0xab, 0xf7, 0xc3, 0xcf, + 0xb4, 0x6c, 0x78, 0xc0, 0x25, 0x59, 0x6e, 0x4a, 0xba, 0x50, 0xc3, 0x27, 0x10, 0x89, 0x18, 0x4a, + 0x44, 0x7a, 0x57, 0x1a, 0xbb, 0x7f, 0x4a, 0x1b, 0x1c, 0x41, 0xf5, 0xd5, 0xca, 0x80, 0x62, 0x94, + 0x0d, 0x43, 0x69, 0x77, 0x85, 0x89, 0xfd, 0xe8, 0x1a, 0x71, 0xb2, 0x22, 0x8f, 0x01, 0x8c, 0x4c, + 0x83, 0x6c, 0xf3, 0x89, 0xf8, 0x54, 0xf8, 0x6d, 0xe7, 0x1a, 0x68, 0xb1, 0x69, 0x3f, 0xe8, 0xff, + 0xa1, 0xc5, 0x9c, 0xe7, 0xe9, 0xf9, 0x22, 0x3d, 0xeb, 0xad, 0xa2, 0x56, 0x6d, 0x2b, 0x0e, 0x56, + 0x78, 0xa4, 0x8b, 0xfb, 0x53, 0x0e, 0x7b, 0xee, 0x42, 0xbd, 0x2a, 0xc7, 0x30, 0x4a, 0x0a, 0x5a, + 0xe3, 0x39, 0xa2, 0xcd +}; + +static const BYTE pbHash[] = +{ + 0xa4, 0x12, 0x5a, 0xa9, 0x89, 0x80, 0x92, 0xca, 0x50, 0xc3, 0xc1, 0x63, 0x1c, 0x03, 0xdc, 0xbc, + 0x9d, 0xf9, 0x5c, 0xeb, 0xb4, 0x09, 0x88, 0x1e, 0x58, 0x01, 0x08, 0xb6, 0xcc, 0x47, 0x04, 0xb7, + 0x6c, 0xc7, 0x7b, 0x87, 0x95, 0xfd, 0x59, 0x40, 0x56, 0x1e, 0x32, 0x24, 0xcc, 0x75, 0x84, 0x85, + 0x18, 0x99, 0x2b, 0xd8, 0xd9, 0xb7, 0x0f, 0xe0, 0xfc, 0x97, 0x7a, 0x47, 0x60, 0x63, 0xc8, 0xbf +}; + +static const BYTE pbSessionId[] = +{ + 0xa4, 0x12, 0x5a, 0xa9, 0x89, 0x80, 0x92, 0xca, 0x50, 0xc3, 0xc1, 0x63, 0x1c, 0x03, 0xdc, 0xbc, + 0x9d, 0xf9, 0x5c, 0xeb, 0xb4, 0x09, 0x88, 0x1e, 0x58, 0x01, 0x08, 0xb6, 0xcc, 0x47, 0x04, 0xb7, + 0x6c, 0xc7, 0x7b, 0x87, 0x95, 0xfd, 0x59, 0x40, 0x56, 0x1e, 0x32, 0x24, 0xcc, 0x75, 0x84, 0x85, + 0x18, 0x99, 0x2b, 0xd8, 0xd9, 0xb7, 0x0f, 0xe0, 0xfc, 0x97, 0x7a, 0x47, 0x60, 0x63, 0xc8, 0xbf +}; + +static const BYTE label = SYMCRYPT_SSHKDF_ENCRYPTION_KEY_CLIENT_TO_SERVER; + +static const BYTE pbResult[] = +{ + 0x7e, 0x4a, 0x72, 0x1f, 0xb7, 0x37, 0x9e, 0xbb, 0x42, 0x33, 0x06, 0x46, 0x4d, 0x57, 0xdb, 0x46, + 0xaf, 0xa3, 0xcc, 0xa1, 0x0a, 0x1d, 0x7f, 0xeb +}; + +VOID +SYMCRYPT_CALL +SymCryptSshKdfSha512SelfTest(void) +{ + SYMCRYPT_SSHKDF_EXPANDED_KEY expandedKey; + SYMCRYPT_ALIGN BYTE rbResult[sizeof(pbResult)]; + + SymCryptSshKdfExpandKey(&expandedKey, SymCryptSha512Algorithm, pbKey, sizeof(pbKey)); + + SymCryptSshKdfDerive(&expandedKey, + pbHash, sizeof(pbHash), + label, + pbSessionId, sizeof(pbSessionId), + rbResult, sizeof(rbResult) + ); + + SymCryptInjectError(rbResult, sizeof(rbResult)); + + if (memcmp(rbResult, pbResult, sizeof(pbResult)) != 0) + { + SymCryptFatal('sshk'); + } +} diff --git a/libs/symcrypt/lib/sskdf.c b/libs/symcrypt/lib/sskdf.c new file mode 100644 index 00000000000..65aebaf24dd --- /dev/null +++ b/libs/symcrypt/lib/sskdf.c @@ -0,0 +1,266 @@ +// +// sskdf.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +// +// This module implements Single-Step KDF as specified in SP800-56C section 4. +// + +#include "precomp.h" + +// +// See the symcrypt.h file for documentation on what the various functions do. +// + + +#define SYMCRYPT_SSKDF_KMAC_128_DEFAULT_SALT_SIZE (164) +#define SYMCRYPT_SSKDF_KMAC_256_DEFAULT_SALT_SIZE (132) +#define SYMCRYPT_SSKDF_DEFAULT_SALT_MAX SYMCRYPT_SSKDF_KMAC_128_DEFAULT_SALT_SIZE + + +static const BYTE pbKmacCustomizationString[3] = { 'K', 'D', 'F' }; + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSskdfMacExpandSalt( + _Out_ PSYMCRYPT_SSKDF_MAC_EXPANDED_SALT pExpandedSalt, + _In_ PCSYMCRYPT_MAC macAlgorithm, + _In_reads_opt_(cbSalt) PCBYTE pbSalt, + SIZE_T cbSalt) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + const BYTE pbSaltDefault[SYMCRYPT_SSKDF_DEFAULT_SALT_MAX] = { 0 }; + + SYMCRYPT_ASSERT( macAlgorithm->expandedKeySize <= sizeof( pExpandedSalt->macKey ) ); + + pExpandedSalt->macAlg = macAlgorithm; + + if ( pbSalt == NULL ) + { + if ( macAlgorithm == SymCryptKmac128Algorithm ) + { + cbSalt = SYMCRYPT_SSKDF_KMAC_128_DEFAULT_SALT_SIZE; + } + else if ( macAlgorithm == SymCryptKmac256Algorithm ) + { + cbSalt = SYMCRYPT_SSKDF_KMAC_256_DEFAULT_SALT_SIZE; + } + else + { + cbSalt = SymCryptHashInputBlockSize( *(macAlgorithm->ppHashAlgorithm) ); + } + + pbSalt = pbSaltDefault; + } + + if ( macAlgorithm == SymCryptKmac128Algorithm ) + { + scError = SymCryptKmac128ExpandKeyEx( + &pExpandedSalt->macKey.kmac128Key, + pbSalt, + cbSalt, + pbKmacCustomizationString, + sizeof( pbKmacCustomizationString ) ); + } + else if ( macAlgorithm == SymCryptKmac256Algorithm ) + { + scError = SymCryptKmac256ExpandKeyEx( + &pExpandedSalt->macKey.kmac256Key, + pbSalt, + cbSalt, + pbKmacCustomizationString, + sizeof( pbKmacCustomizationString ) ); + } + else + { + scError = macAlgorithm->expandKeyFunc( &pExpandedSalt->macKey, pbSalt, cbSalt ); + } + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSskdfMacDerive( + _In_ PCSYMCRYPT_SSKDF_MAC_EXPANDED_SALT pExpandedSalt, + SIZE_T cbMacOutputSize, + _In_reads_(cbSecret) PCBYTE pbSecret, + SIZE_T cbSecret, + _In_reads_opt_(cbInfo) PCBYTE pbInfo, + SIZE_T cbInfo, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + SYMCRYPT_MAC_STATE state; + PCSYMCRYPT_MAC pMacAlgorithm = pExpandedSalt->macAlg; + PSYMCRYPT_MAC_RESULT_EX resultFuncEx = NULL; + + SYMCRYPT_ALIGN BYTE rbPartialResult[SYMCRYPT_MAC_MAX_RESULT_SIZE]; + PBYTE pbCurr = pbResult; + + SIZE_T cbMacResultSize = pMacAlgorithm->resultSize; + SIZE_T cbBlock; + UINT32 cntr = 1; + BYTE ctrBuf[4]; + + if ( cbMacOutputSize > 64 ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if ( pMacAlgorithm == SymCryptKmac128Algorithm ) + { + cbMacResultSize = cbMacOutputSize; + resultFuncEx = SymCryptKmac128ResultEx; + } + else if ( pMacAlgorithm == SymCryptKmac256Algorithm ) + { + cbMacResultSize = cbMacOutputSize; + resultFuncEx = SymCryptKmac256ResultEx; + } + else if ( cbMacOutputSize > 0 && cbMacOutputSize != pMacAlgorithm->resultSize ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + while ( cbResult > 0 ) + { + SYMCRYPT_STORE_MSBFIRST32( ctrBuf, cntr ); + + // Calculate K(i) = H(counter || Z || FixedInfo) + pMacAlgorithm->initFunc( &state, &pExpandedSalt->macKey ); + pMacAlgorithm->appendFunc( &state, ctrBuf, sizeof( ctrBuf ) ); + pMacAlgorithm->appendFunc( &state, pbSecret, cbSecret ); + pMacAlgorithm->appendFunc( &state, pbInfo, cbInfo ); + + cbBlock = SYMCRYPT_MIN( cbResult, cbMacResultSize ); + + if ( resultFuncEx != NULL ) + { + if ( cbMacOutputSize > 0 ) + { + resultFuncEx( &state, rbPartialResult, cbMacOutputSize ); + } + else + { + // If the output size is not specified, calculate the full result + resultFuncEx( &state, pbResult, cbResult ); + break; + } + } + else + { + pMacAlgorithm->resultFunc( &state, rbPartialResult ); + } + + // Store the result in the output buffer + memcpy( pbCurr, rbPartialResult, cbBlock ); + + // Update counters + cntr++; + pbCurr += cbBlock; + cbResult -= cbBlock; + } + +cleanup: + + SymCryptWipeKnownSize( &rbPartialResult[0], sizeof( rbPartialResult ) ); + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSskdfMac( + _In_ PCSYMCRYPT_MAC macAlgorithm, + SIZE_T cbMacOutputSize, + _In_reads_(cbSecret) PCBYTE pbSecret, + SIZE_T cbSecret, + _In_reads_opt_(cbSalt) PCBYTE pbSalt, + SIZE_T cbSalt, + _In_reads_opt_(cbInfo) PCBYTE pbInfo, + SIZE_T cbInfo, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + SYMCRYPT_SSKDF_MAC_EXPANDED_SALT expandedSalt; + + scError = SymCryptSskdfMacExpandSalt( &expandedSalt, macAlgorithm, pbSalt, cbSalt ); + + if ( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + scError = SymCryptSskdfMacDerive( + &expandedSalt, + cbMacOutputSize, + pbSecret, + cbSecret, + pbInfo, + cbInfo, + pbResult, + cbResult ); + +cleanup: + + SymCryptWipeKnownSize( &expandedSalt, sizeof( expandedSalt ) ); + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptSskdfHash( + _In_ PCSYMCRYPT_HASH hashAlgorithm, + SIZE_T cbHashOutputSize, + _In_reads_(cbSecret) PCBYTE pbSecret, + SIZE_T cbSecret, + _In_reads_opt_(cbInfo) PCBYTE pbInfo, + SIZE_T cbInfo, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult) +{ + SYMCRYPT_HASH_STATE state; + + PBYTE pbCurr = pbResult; + + SIZE_T cbHashResultSize = hashAlgorithm->resultSize; + SIZE_T cbPartialResult; + UINT32 cntr = 1; + BYTE ctrBuf[4]; + + if ( cbHashOutputSize > 64 || + cbHashOutputSize > 0 && cbHashOutputSize != hashAlgorithm->resultSize ) + { + return SYMCRYPT_INVALID_ARGUMENT; + } + + while ( cbResult > 0 ) + { + SYMCRYPT_STORE_MSBFIRST32( ctrBuf, cntr ); + + cbPartialResult = SYMCRYPT_MIN( cbResult, cbHashResultSize ); + + // Calculate K(i) = H(counter || Z || FixedInfo) + SymCryptHashInit( hashAlgorithm, &state ); + SymCryptHashAppend( hashAlgorithm, &state, ctrBuf, sizeof( ctrBuf ) ); + SymCryptHashAppend( hashAlgorithm, &state, pbSecret, cbSecret ); + SymCryptHashAppend( hashAlgorithm, &state, pbInfo, cbInfo ); + SymCryptHashResult( hashAlgorithm, &state, pbCurr, cbPartialResult ); + + // Update counters + cntr++; + pbCurr += cbPartialResult; + cbResult -= cbPartialResult; + } + + return SYMCRYPT_NO_ERROR; +} diff --git a/libs/symcrypt/lib/tlsCbcVerify.c b/libs/symcrypt/lib/tlsCbcVerify.c new file mode 100644 index 00000000000..26a1a173a07 --- /dev/null +++ b/libs/symcrypt/lib/tlsCbcVerify.c @@ -0,0 +1,458 @@ +// +// tlsCbcVerify.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +// +// This code needs to process data in words, and we'd like to use 32-bit words on 32-bit +// architectures and 64-bit words on 64-bit architectures. So we use NATIVE_UINT & friends. +// +// We don't want to use 64-bit words on 32-bit architectures because the 64-bit shift/rotate +// code might not be constant-time, and it puts further register pressure on the x86 that can only +// use 6 registers in C code. +// + +#if NATIVE_BYTES == 8 +#define NATIVE_01 (0x0101010101010101) +#elif NATIVE_BYTES == 4 +#define NATIVE_01 (0x01010101) +#else +#error Unexpected NATIVE_BYTES value +#endif + +// +// MASK32 macros return UINT32 values based on conditions of the inputs +// + +// MASK32_LT returns a UINT32 that is -1 if _a < _b, 0 otherwise. +#define MASK32_LT( _a, _b ) ((UINT32)( ( (INT32)((_a)-(_b)) ) >> 31 ) ) + +// MASK32_EQ returns a UINT32 that is -1 if _a == _b, 0 otherwise. +#define MASK32_EQ( _a, _b ) (~(UINT32)(-(INT32)((_a) ^ (_b)) >> 31)) + + +// +// Native Byte mask generation is done in inlined functions, as that makes them much more readable +// These are mask values that are computed per byte in the word. +// + +// Relevant bits to look at when determining whether an index is in the word +// Difference of index & word start must be in 0..NATIVE_BYTES - 1 +// This mask defines the relevant bits we look at. +// We avoid using the highest bits as we use fact that the result after the mask +// is positive. This works as all our positions are < 2^16. +#define MASKNB_INWORD_RELEVANTBITS (~(NATIVE_BYTES - 1) & 0x0fffffff) + +#define MASKNB_BROADCAST( _b ) ((NATIVE_UINT)(_b) * NATIVE_01) + +FORCEINLINE +NATIVE_UINT +SymCryptNMaskGe( UINT32 wordStart, UINT32 boundary ) +// Return a word starting at byte wordStart from an array with a[i] = 0xff if i>=boundary, 0 otherwise +{ + INT32 diff32; + NATIVE_INT anySet; + UINT32 shift; + + // Mask that is -1 if boundary < wordStart + 8 + anySet = ((NATIVE_INT) boundary - (NATIVE_INT) wordStart - NATIVE_BYTES) >> (NATIVE_BITS - 1); + + // Compute the index of boundary into the word, possibly negative + diff32 = (INT32)boundary - (INT32)wordStart; + // Compute the necessary shift when the result will be partially set + shift = 8 * (diff32 & (NATIVE_BYTES - 1)); + + // Mask the shift to 0 if the word is to be all set as boundary < wordStart + shift &= (INT32)~diff32 >> 31; + + return (NATIVE_UINT) anySet << shift; +} + +FORCEINLINE +NATIVE_UINT +SymCryptNMaskEq( UINT32 wordStart, UINT32 boundary ) +// Return a word starting at byte wordStart from an array with a[i] = (i == boundary) ? 0xff : 0 +{ + INT32 diff32; + NATIVE_UINT inWord; + + // 32-bit signed difference + diff32 = (INT32)boundary - (INT32)wordStart; + + // inWord = (-1) if boundary is within the word, 0 otherwise + // Cast to NATIVE_UINT is free on AMD64, as is subsequent cast to NATIVE_INT + // A direct cast from INT32 to NATIVE_INT requires a sign extension instruction, so this is faster. + inWord = ~ ((-(NATIVE_INT)(NATIVE_UINT)(diff32 & MASKNB_INWORD_RELEVANTBITS)) >> (NATIVE_BITS -1)); + + return inWord & ((NATIVE_UINT)0xff << 8 * (diff32 & (NATIVE_BYTES - 1)) ); +} + +FORCEINLINE +NATIVE_UINT +SymCryptNMaskEq80( UINT32 wordStart, UINT32 boundary ) +// Same as SymcryptNMaskEq except the 0xff is replaced with 0x80. +{ + INT32 diff32; + NATIVE_UINT inWord; + + // 32-bit signed difference + diff32 = (INT32)boundary - (INT32)wordStart; + + // inWord = (-1) if boundary is within the word, 0 otherwise + inWord = ~ ((-(NATIVE_INT)(NATIVE_UINT)(diff32 & MASKNB_INWORD_RELEVANTBITS)) >> (NATIVE_BITS -1)); + + return inWord & ((NATIVE_UINT)0x80 << 8 * (diff32 & (NATIVE_BYTES - 1)) ); +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptTlsCbcHmacVerifyCore( + _In_ PCSYMCRYPT_HASH pHash, + _Inout_ PSYMCRYPT_COMMON_HASH_STATE pState, + _In_reads_bytes_(cbData) PCBYTE pbData, + SIZE_T cbData, + _Inout_updates_( pHash->inputBlockSize / 2) PBYTE pbMacValue, + _Inout_updates_( pHash->resultSize ) PBYTE pbHashResult, + _Out_ PUINT32 pu32PaddingError ) +// +// The core of the constant-time TLS record validation. +// This appends the data part of the record to the hash state, returns the intermediate hash value, +// and extracts the MAC value out of the record and returns it. +// +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + UINT32 cbPad; + UINT32 maxPadLength; + UINT32 u32; + UINT32 i; + UINT32 iPaddingStart; // using 'i' for index + UINT32 iMacStart; + NATIVE_UINT mInData; + NATIVE_UINT mInMac; + NATIVE_UINT mInPadding; + NATIVE_UINT m; + NATIVE_UINT nPaddingError = 0; // nonzero if a padding byte value is wrong. + UINT32 next; + UINT32 cbHashPrefix; + UINT32 cbExtendedData; + UINT32 totalBytesHashed; + UINT32 hashPaddingFinal; + UINT32 resultHashBlockIndex; + UINT32 lastHashBlockIndex; + NATIVE_UINT w; + NATIVE_UINT data; + UINT32 backOffset; + NATIVE_UINT * bufferLocation; + NATIVE_UINT padBytes; + UINT32 m32ResultBlock; + NATIVE_UINT mResultBlock; + SIZE_T tmp; + const UINT32 cbMacValue = pHash->inputBlockSize / 2; + + SYMCRYPT_ASSERT( cbMacValue == SymCryptRoundUpPow2Sizet(pHash->resultSize) ); + + // Process all the data up to the part where the MAC value might appear + // The if() is safe as both cbData and u32 are public values. + u32 = pHash->resultSize + 256; + if( cbData > u32 ) + { + (*pHash->appendFunc)(pState, pbData, cbData-u32 ); + pbData += cbData - u32; + cbData = u32; + } + + // Check that we have enough data for a valid record. + // We need one MAC value plus one padding_length byte + if (cbData < pHash->resultSize + 1) + { + scError = SYMCRYPT_BUFFER_TOO_SMALL; + goto cleanup; + } + + // We OR our results into the result buffers, so we must init them to zero + SymCryptWipe( pbMacValue, cbMacValue ); + SymCryptWipe( pbHashResult, pHash->resultSize ); + + // Pick up the padding_length. Note that this is the value we have to keep secret from + // side-channel attacks. + cbPad = pbData[cbData - 1]; + + // We reduce cbData so that the padding_length byte is no longer under consideration. + cbData -= 1; + + // Bound the padding length to cbData - mac_length + // This doesn't reveal data as we treat all cbPad values the same, but it makes our + // further computations easier + maxPadLength = (UINT32)cbData - pHash->resultSize; // We checked this is >= 0 + u32 = MASK32_LT( maxPadLength, cbPad ); // mask: maxPadLength < cbPad + cbPad = cbPad + ((maxPadLength - cbPad) & u32); + nPaddingError |= u32; // mark as padding error + + // From here on out we maintain indices into a conceptual extended buffer with length cbExtendedData, + // and index 0 at the start of the hash computation. + // This aligns us with the hash input block, and simplifies hash padding computations and word + // accesses. + // However, we must always subtract cbHashPrefix from indices before using them to access bytes + // in pbData, as the HashPrefix was already hashed into the MAC state. + + cbHashPrefix = (UINT32)pState->dataLengthL; // # bytes already hashed into the MAC state + cbExtendedData = (UINT32)cbData + cbHashPrefix; // total # bytes that the conceptual extended buffer has + next = cbHashPrefix; // next index we will consider for processing (start of pbData) + + totalBytesHashed = cbExtendedData - pHash->resultSize - cbPad; + SYMCRYPT_STORE_MSBFIRST32( &hashPaddingFinal, totalBytesHashed * 8 ); // Length padding for result hash block + + // We need to figure out what the index is of the last hash input block in the computation + // (including any phantom blocks after the actual hash is done) and the index of the result + // hash block of the actual hash computation. + // We've limited the max input for simplicity (everything fits in 32 bits) + // We also avoid 64-bit operations as their implementation on 32-bit architectures might not + // always be constant-time. + // This computation works for SHA-1, SHA-256, and SHA-384 which is all we care about + // We avoid using % as the runtime isn't constant and our inputs are secret. + + // First the actual # bytes in the real and phantom computation + resultHashBlockIndex = totalBytesHashed + 1 + pHash->inputBlockSize / 8; // 1 byte 0x80 + length padding in last block + lastHashBlockIndex = resultHashBlockIndex + cbPad; // The furthest any hash could go + + // round up to a whole # blocks + resultHashBlockIndex = (resultHashBlockIndex + pHash->inputBlockSize - 1) & ~(pHash->inputBlockSize - 1); + lastHashBlockIndex = (lastHashBlockIndex + pHash->inputBlockSize - 1) & ~(pHash->inputBlockSize - 1); + + // Compute the indices where the MAC and padding start + iPaddingStart = cbExtendedData - cbPad; + iMacStart = iPaddingStart - pHash->resultSize; + + SYMCRYPT_ASSERT( iMacStart < cbExtendedData ); // Fail if the last computation underflowed. + + // Align our handling to the native word size so that we can safely use native words + if( (next & (NATIVE_BYTES - 1)) != 0 ) + { + backOffset = next & (NATIVE_BYTES - 1); + + // Process a partial word + SYMCRYPT_ASSERT( ( (next ^ pState->bytesInBuffer) & (NATIVE_BYTES - 1) ) == 0 ); + + // Read a word; as the MAC value is > 8 bytes this won't overflow the buffer + w = *(NATIVE_UINT *) &pbData[0]; + m = SymCryptNMaskGe( next, iMacStart ); + mInData = ~m; + mInMac = m; + + data = w & mInData; + data |= SymCryptNMaskEq80( next, iMacStart ); // add 0x80 byte @ iMacStart + + // Now we put the data into the hash buffer + bufferLocation = (NATIVE_UINT *)&pState->buffer[ pState->bytesInBuffer - backOffset ]; + *bufferLocation = (*bufferLocation & (((NATIVE_UINT)1 << 8*backOffset) - 1)) | (data << 8*backOffset); + pState->bytesInBuffer += NATIVE_BYTES - backOffset; + + // And the MAC data in the mac buffer + *(NATIVE_UINT *)&pbMacValue[(next - backOffset) & (cbMacValue - 1)] |= (w & mInMac) << 8*backOffset; + + if( pState->bytesInBuffer == pHash->inputBlockSize ) + { + // Block is full. This can't be the result block as we didn't have room for the padding yet. + (*pHash->appendBlockFunc)( (PBYTE)pState + pHash->chainOffset, &pState->buffer[0], pHash->inputBlockSize, &tmp ); + pState->bytesInBuffer = 0; + } + + next += NATIVE_BYTES - backOffset; + } + + padBytes = MASKNB_BROADCAST( cbPad ); + + // Now we can loop over the data in whole words + while( next <= cbExtendedData - NATIVE_BYTES ) + { + w = *(NATIVE_UINT *) &pbData[next - cbHashPrefix]; + + m = SymCryptNMaskGe( next, iMacStart ); + mInMac = m; + mInData = ~m; + + m = SymCryptNMaskGe( next, iPaddingStart ); + mInPadding = m; + mInMac &= ~m; + + data = w & mInData; + data |= SymCryptNMaskEq80( next, iMacStart ); // add 0x80 byte @ iMacStart + + *(NATIVE_UINT *)(&pState->buffer[ pState->bytesInBuffer ]) = data; + pState->bytesInBuffer += NATIVE_BYTES; + + if (pState->bytesInBuffer == pHash->inputBlockSize) + { + // Insert the length component of the hash padding (only in result block) + m32ResultBlock = MASK32_EQ( next, resultHashBlockIndex - NATIVE_BYTES ); + *(UINT32*) &pState->buffer[ pHash->inputBlockSize - 4 ] |= hashPaddingFinal & m32ResultBlock; + + (*pHash->appendBlockFunc)( (PBYTE)pState + pHash->chainOffset, &pState->buffer[0], pHash->inputBlockSize, &tmp ); + SYMCRYPT_ASSERT( tmp == 0 ); + + mResultBlock = (NATIVE_UINT)(NATIVE_INT)(INT32) m32ResultBlock; // Convert 32-bit mask to native mask + + // Masked copy of result to result buffer + // We do whole words, and then an optional UINT32 to handle the 20-byte SHA-1 result on AMD64. + // The for() and if() are side-channel safe as the resultSize and NATIVE_BYTES values are public. + for ( i = 0; i < pHash->resultSize / NATIVE_BYTES; i++) + { + ((NATIVE_UINT *)pbHashResult)[i] |= ((NATIVE_UINT *)((PBYTE)pState + pHash->chainOffset))[i] & mResultBlock; + } + if( (pHash->resultSize & (NATIVE_BYTES - 1)) != 0 ) + { + *(UINT32 *) (&pbHashResult[ pHash->resultSize - 4 ]) |= *(UINT32 *) ((PBYTE) pState + pHash->chainOffset + pHash->resultSize - 4) & (UINT32) mResultBlock; + } + pState->bytesInBuffer = 0; + } + + *(NATIVE_UINT *)&pbMacValue[next & (cbMacValue - 1)] |= w & mInMac; + + nPaddingError |= (w ^ padBytes) & mInPadding; + + next += NATIVE_BYTES; + } + + if( next < cbExtendedData ) + { + // Process the remaining bytes. This can't be data so we only do the MAC and padding... + // The main difference is that we read the last full word in pbData and then align it + // as if we read the next word starting at pbData[next - cbHashPrefix] + w = *(NATIVE_UINT *) &pbData[ cbData - NATIVE_BYTES ]; // last word + w >>= 8 * (next - cbExtendedData + NATIVE_BYTES ); // Shift to right location + padBytes >>= 8 * (next - cbExtendedData + NATIVE_BYTES ); // Zero padBytes that are never read + + m = SymCryptNMaskGe( next, iPaddingStart ); + mInPadding = m; + mInMac = ~m; + + *(NATIVE_UINT *)&pbMacValue[next & (cbMacValue - 1)] |= w & mInMac; + + nPaddingError |= (w ^ padBytes) & mInPadding; + next = cbExtendedData; + } + + // At this point we still have to potentially do one more hash block. + // The data is all copied into the hash input buffer, as is the 0x80 padding byte. + + if (next < lastHashBlockIndex) + { + // there is still one more hash block to compute. This could either be the actual last block of the hash + // computation, or a phantom block for side-channel hiding. + // This IF depends only on the cbData, the # bytes hashed before this final pbData buffer, and the hash algorithm + // properties. + // We never need to compute more than 1 additional hash block as we are at least pHash->resultSize bytes beyond the + // actual data. + SymCryptWipe( &pState->buffer[ pState->bytesInBuffer], pHash->inputBlockSize - pState->bytesInBuffer ); + + // Just put in the padding, no need to mask this + *(UINT32*) &pState->buffer[ pHash->inputBlockSize - 4 ] = hashPaddingFinal; + + (*pHash->appendBlockFunc)( (PBYTE)pState + pHash->chainOffset, &pState->buffer[0], pHash->inputBlockSize, &tmp ); + SYMCRYPT_ASSERT( tmp == 0 ); + + // Masked copy of the result + mResultBlock = (NATIVE_UINT)(NATIVE_INT)(INT32) MASK32_EQ( lastHashBlockIndex, resultHashBlockIndex ); + + // Masked copy of result to result buffer + // We do whole words, and then an optional UINT32 to handle the 20-byte SHA-1 result on AMD64. + for ( i = 0; i < pHash->resultSize / NATIVE_BYTES; i++) + { + ((NATIVE_UINT *)pbHashResult)[i] |= ((NATIVE_UINT *)((PBYTE)pState + pHash->chainOffset))[i] & mResultBlock; + } + if( (pHash->resultSize & (NATIVE_BYTES - 1)) != 0 ) + { + *(UINT32 *) (&pbHashResult[ pHash->resultSize - 4 ]) |= *(UINT32 *) ((PBYTE) pState + pHash->chainOffset + pHash->resultSize - 4) & (UINT32) mResultBlock; + } + pState->bytesInBuffer = 0; + } + + // Now we have the hash result, and the Mac value buffer is filled with a rotated copy of the Mac value. + // We have to un-rotate the Mac value. + + // Check that we have the right hash result + //for( SIZE_T t=0; t < cbMacValue; t++ ) + //{ + // SYMCRYPT_ASSERT( pbMacValue[ (iMacStart + t) & (cbMacValue - 1 ) ] == (t >= pHash->resultSize ? 0 : pbData[iMacStart - cbHashPrefix + t] )); + //} + + SymCryptScsRotateBuffer( pbMacValue, cbMacValue, iMacStart & (cbMacValue - 1) ); + + //for( SIZE_T t=0; t < cbMacValue; t++ ) + //{ + // SYMCRYPT_ASSERT( pbMacValue[ t ] == (t >= pHash->resultSize ? 0 : pbData[iMacStart - cbHashPrefix + t] )); + //} + +cleanup: + + nPaddingError |= nPaddingError >> (NATIVE_BITS/2); // Map possibly 64 bits down to 32 + + *pu32PaddingError = (UINT32) nPaddingError; + + return scError; +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptTlsCbcHmacVerify( + _In_ PCSYMCRYPT_MAC pMacAlgorithm, + _In_ PVOID pExpandedKey, + _Inout_ PVOID pState, + _In_reads_(cbData) PCBYTE pbData, + SIZE_T cbData) +{ + BYTE abMacValue[64]; + BYTE abHashResult[48]; + UINT32 u32PaddingError; + PSYMCRYPT_COMMON_HASH_STATE pHashState = (PSYMCRYPT_COMMON_HASH_STATE) pState; + PCSYMCRYPT_HASH pHashAlgorithm = *(pMacAlgorithm->ppHashAlgorithm); + UINT32 i; + + SYMCRYPT_ASSERT(pMacAlgorithm == SymCryptHmacSha1Algorithm || + pMacAlgorithm == SymCryptHmacSha256Algorithm || + pMacAlgorithm == SymCryptHmacSha384Algorithm ); + SYMCRYPT_ASSERT(((*(pMacAlgorithm->ppHashAlgorithm))->inputBlockSize)/2 <= 64); + SYMCRYPT_ASSERT((*(pMacAlgorithm->ppHashAlgorithm))->resultSize <= 48); + + SymCryptTlsCbcHmacVerifyCore( + pHashAlgorithm, + pHashState, + pbData, + cbData, + abMacValue, + abHashResult, + &u32PaddingError ); + + // We have the hash value, convert it to a MAC value + // First we set up the chaining value + memcpy( ((PBYTE)pHashState + pHashAlgorithm->chainOffset), + (PBYTE)pExpandedKey + pMacAlgorithm->outerChainingStateOffset, + pHashAlgorithm->chainSize ); + // Then copy the data & set the length + // The hash result wasn't BSWAPPED yet... + if( pMacAlgorithm->resultSize <= 32 ) + { + SymCryptUint32ToMsbFirst( (UINT32 *) abHashResult, pHashState->buffer, pHashAlgorithm->resultSize / 4 ); + } else { + SymCryptUint64ToMsbFirst( (UINT64 *) abHashResult, pHashState->buffer, pHashAlgorithm->resultSize / 8 ); + } + pHashState->bytesInBuffer = pHashAlgorithm->resultSize; + pHashState->dataLengthL = pHashAlgorithm->resultSize + pHashAlgorithm->inputBlockSize; + + (*pHashAlgorithm->resultFunc)( pHashState, abHashResult ); + + // Verify in 32-bit chunks to support SHA-1 without further problems + for( i=0; i<pHashAlgorithm->resultSize / 4; i++ ) + { + u32PaddingError |= *(PUINT32)&abHashResult[4*i] ^ *(PUINT32)&abMacValue[4*i]; + } + + // We may reveal the final error-or-no-error as that will be visible anyway + return u32PaddingError == 0 ? SYMCRYPT_NO_ERROR : SYMCRYPT_AUTHENTICATION_FAILURE; +} diff --git a/libs/symcrypt/lib/tlsprf.c b/libs/symcrypt/lib/tlsprf.c new file mode 100644 index 00000000000..4a3167ec65e --- /dev/null +++ b/libs/symcrypt/lib/tlsprf.c @@ -0,0 +1,569 @@ +// +// tlsprf.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +// +// This module contains the routines to implement the two PRF +// functions for the TLS protocols 1.1 and 1.2. These are used in +// the protocol's key derivation function. +// +// + +#include "precomp.h" + +// +// TLS PRF Constants +// +#define SYMCRYPT_TLS_MAX_LABEL_AND_SEED_SIZE (SYMCRYPT_TLS_MAX_LABEL_SIZE + SYMCRYPT_TLS_MAX_SEED_SIZE) + +// This **MUST** be a common multiple of MD5 +// output size and SHA1 output size. +#define SYMCRYPT_TLS_1_1_CHUNK_SIZE 80 + +// +// SymCryptTlsPrf1_1ExpandKey is the key expansion function for versions 1.0 +// and 1.1 of the TLS protocol. It takes as inputs a pointer to the expanded TLSPRF1.1 +// key, and the key material in pbKey. Regarding the treatment of the key +// material (the "secret"), the following is defined in RFCs 2246 and 4346: +// +// TLS's PRF is created by splitting the secret into two halves and +// using one half to generate data with P_MD5 and the other half to +// generate data with P_SHA - 1, then exclusive - or'ing the outputs of +// these two expansion functions together. +// +// S1 and S2 are the two halves of the secret and each is the same +// length. S1 is taken from the first half of the secret, S2 from the +// second half. Their length is created by rounding up the length of the +// overall secret divided by two; thus, if the original secret is an odd +// number of bytes long, the last byte of S1 will be the same as the +// first byte of S2. +// +// L_S = length in bytes of secret; +// L_S1 = L_S2 = ceil(L_S / 2); +// +// The secret is partitioned into two halves (with the possibility of +// one shared byte) as described above, S1 taking the first L_S1 bytes +// and S2 the last L_S2 bytes. +// +// Note: In pre-RS1 Windows if the length of the key material of each half +// exceeded HMAC_K_PADSIZE = 64, we truncated the key. This does not comply +// with RFC 2014 (HMAC). However, as of April 2016 several cipher suites +// used keys (pre-master secret) longer than 128 bytes. To achieve interop +// with servers complying to the RFC we use the entire key for the HMAC calculation. +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptTlsPrf1_1ExpandKey( + _Out_ PSYMCRYPT_TLSPRF1_1_EXPANDED_KEY pExpandedKey, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + SIZE_T cbKeySize; + SIZE_T cbHalfSecret; + SIZE_T cbOdd; + + // Calculating the two halves + cbHalfSecret = cbKey / 2; + cbOdd = cbKey % 2; + cbKeySize = cbHalfSecret + cbOdd; + + // + // The bytes of the key material are split as following: + // cbOdd == 0 => cbKeySize == cbHalfSecret + // + // ******************************************** + // <----cbHalfSecret----><----cbHalfSecret----> + // <----cbKeySize-------><----cbKeySize-------> + // + // + // cbOdd == 1 => cbKeySize == cbHalfSecret + 1 + // + // **********************$********************** + // <----cbHalfSecret----> <----cbHalfSecret----> + // <----cbKeySize--------> + // <----cbKeySize--------> + // + // Note that the middle byte of the key input might be + // read twice (when the key length is odd). This violates + // the standard rule that input data should only be read + // once. In this case, we do this for the following reasons: + // - Avoiding the dual-read is difficult; we'd have to buffer + // an arbitrary-size input, and SymCrypt avoids memory + // allocations for symmetric algorithms. + // - The dual-reading of inputs is a problem when the + // memory is double-mapped to a different (less trusted) + // security context. (E.g. a kernel-mode operation on + // memory that is also mapped into a user address space.) + // This PRF is used by TLS in LSA where that situation + // does not occur. + // - This is used for TLS 1.0 and TLS 1.1, both of which + // are on the deprecation path. + // - In the dual-read attack, the input is typically provided + // by the attacker, and then changed whilst the code is + // accessing it. But if the attacker is providing the input, + // she could just as well have provided an even-length key + // input that provides full freedom for choosing both HMAC + // keys; there is simply no reason to try and perform the + // dual-read attack. + // - Even if the dual-read problem were to occur, it does not + // seem to help an attacker in any way. + + // MD5 Key Expansion + scError = SymCryptHmacMd5ExpandKey(&pExpandedKey->macMd5Key, pbKey, cbKeySize); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // SHA1 Key Expansion + scError = SymCryptHmacSha1ExpandKey(&pExpandedKey->macSha1Key, pbKey + cbHalfSecret, cbKeySize); + if (scError != SYMCRYPT_NO_ERROR) + { + SymCryptWipeKnownSize(&pExpandedKey->macMd5Key, sizeof(pExpandedKey->macMd5Key)); + + goto cleanup; + } + +cleanup: + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptTlsPrf1_2ExpandKey( + _Out_ PSYMCRYPT_TLSPRF1_2_EXPANDED_KEY pExpandedKey, + _In_ PCSYMCRYPT_MAC macAlgorithm, + _In_reads_(cbKey) PCBYTE pbKey, + SIZE_T cbKey ) +{ + SYMCRYPT_ASSERT( macAlgorithm->expandedKeySize <= sizeof( pExpandedKey->macKey ) ); + + pExpandedKey->macAlg = macAlgorithm; + return macAlgorithm->expandKeyFunc( &pExpandedKey->macKey, pbKey, cbKey ); +} + +// +// SymCryptTlsPrfMac uses the expanded key and hashes the concatenated +// inputs pbAi and pbSeed. It is used by all the TLS versions per +// RFCs 2246, 4346, and 5246. +// Remark: +// - cbSeed can be 0 and pbSeed NULL. +// - pbResult should be of size at least pMacAlgorithm->resultSize +// + +VOID +SYMCRYPT_CALL +SymCryptTlsPrfMac( + _In_ PCSYMCRYPT_MAC pMacAlgorithm, + _In_ PCSYMCRYPT_MAC_EXPANDED_KEY pMacExpandedKey, + _In_reads_(cbAi) PCBYTE pbAi, + _In_ SIZE_T cbAi, + _In_reads_opt_(cbSeed) PCBYTE pbSeed, + _In_ SIZE_T cbSeed, + _Out_ PBYTE pbResult) +{ + SYMCRYPT_MAC_STATE macState; + + pMacAlgorithm->initFunc( &macState, pMacExpandedKey ); + pMacAlgorithm->appendFunc(&macState, pbAi, cbAi); + + if (cbSeed > 0) + { + pMacAlgorithm->appendFunc( &macState, pbSeed, cbSeed ); + } + + pMacAlgorithm->resultFunc( &macState, pbResult ); + + // No need to wipe the state. The resultFunc wipes it. +} + +// +// SymCryptTlsPrfPHash is defined in RFCs 2246, 4346, +// and 5246 as follows: +// +// First, we define a data expansion function, P_hash(secret, data) +// which uses a single hash function to expand a secret and seed into +// an arbitrary quantity of output: +// +// P_hash(secret, seed) = HMAC_hash(secret, A(1) + seed) + +// HMAC_hash(secret, A(2) + seed) + +// HMAC_hash(secret, A(3) + seed) + ... +// +// Where + indicates concatenation. +// A() is defined as: +// A(0) = seed +// A(i) = HMAC_hash(secret, A(i-1)) +// + +VOID +SYMCRYPT_CALL +SymCryptTlsPrfPHash( + _In_ PCSYMCRYPT_MAC pMacAlgorithm, + _In_ PCSYMCRYPT_MAC_EXPANDED_KEY pMacExpandedKey, + _In_reads_(cbSeed) PCBYTE pbSeed, + _In_ SIZE_T cbSeed, + _In_reads_opt_(cbAiIn) PCBYTE pbAiIn, // Buffer for the previous Ai (used in 1.1) + _In_ SIZE_T cbAiIn, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult, + _Out_writes_opt_(cbAiOut) PBYTE pbAiOut, // Buffer for the next Ai (only with AiIn) + SIZE_T cbAiOut) +{ + SYMCRYPT_ALIGN BYTE rbAi[SYMCRYPT_MAC_MAX_RESULT_SIZE]; + SYMCRYPT_ALIGN BYTE rbPartialResult[SYMCRYPT_MAC_MAX_RESULT_SIZE]; + BYTE * pbTmp = pbResult; + + SIZE_T cbMacResultSize = pMacAlgorithm->resultSize; + SIZE_T cbBytesToWrite = cbResult; + + if (cbAiIn == 0) + { + // Build A(1) + SymCryptTlsPrfMac( + pMacAlgorithm, + pMacExpandedKey, + pbSeed, // This is A(0) + cbSeed, + NULL, // No "seed" part for A(i)'s + 0, + rbAi); + } + else + { + // Get the previous Ai + memcpy(rbAi, pbAiIn, SYMCRYPT_MIN(SYMCRYPT_MAC_MAX_RESULT_SIZE, cbAiIn)); + } + + while (cbBytesToWrite > 0) + { + // Build HMAC( secret, A(i) + seed) + SymCryptTlsPrfMac( + pMacAlgorithm, + pMacExpandedKey, + rbAi, // this is A(i) + cbMacResultSize, + pbSeed, // the "seed" part + cbSeed, + rbPartialResult); + + // Store it in the output buffer + memcpy(pbTmp, rbPartialResult, SYMCRYPT_MIN(cbBytesToWrite, cbMacResultSize)); + + // Build A(i+1) + SymCryptTlsPrfMac( + pMacAlgorithm, + pMacExpandedKey, + rbAi, // This is A(i) + cbMacResultSize, + NULL, // No "seed" part for A(i)'s + 0, + rbAi); + + if (cbBytesToWrite <= cbMacResultSize) + { + break; + } + + pbTmp += cbMacResultSize; + cbBytesToWrite -= cbMacResultSize; + } + + // Store the next A(i) if needed + if (cbAiOut > 0) + { + memcpy(pbAiOut, rbAi, SYMCRYPT_MIN(cbAiOut,cbMacResultSize)); + } + + SymCryptWipeKnownSize(rbAi, sizeof(rbAi)); + SymCryptWipeKnownSize(rbPartialResult, sizeof(rbPartialResult)); +} + + +// +// The following PRF is defined in RFC 2246 and 4346: +// +// The PRF is then defined as the result of mixing the two pseudorandom +// streams by exclusive - or'ing them together. +// +// PRF(secret, label, seed) = P_MD5(S1, label + seed) XOR +// P_SHA-1(S2, label + seed); +// +// Remark: We will do the do the two P_hash computations in parallel +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptTlsPrf1_1Derive( + _In_ PCSYMCRYPT_TLSPRF1_1_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbLabel) PCBYTE pbLabel, + _In_ SIZE_T cbLabel, + _In_reads_(cbSeed) PCBYTE pbSeed, + _In_ SIZE_T cbSeed, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + SYMCRYPT_ALIGN BYTE rbLabelAndSeed[SYMCRYPT_TLS_MAX_LABEL_AND_SEED_SIZE]; + SIZE_T cbLabelAndSeed = 0; + + SYMCRYPT_ALIGN BYTE rbAiMd5[SYMCRYPT_HMAC_MD5_RESULT_SIZE]; + SYMCRYPT_ALIGN BYTE rbPartialResultMd5[SYMCRYPT_TLS_1_1_CHUNK_SIZE]; + + SYMCRYPT_ALIGN BYTE rbAiSha1[SYMCRYPT_HMAC_SHA1_RESULT_SIZE]; + SYMCRYPT_ALIGN BYTE rbPartialResultSha1[SYMCRYPT_TLS_1_1_CHUNK_SIZE]; + + BYTE * pbTmp = pbResult; + SIZE_T cbBytesToWrite = cbResult; + + // Size checks + if ((cbLabel > SYMCRYPT_TLS_MAX_LABEL_SIZE) || (cbSeed > SYMCRYPT_TLS_MAX_SEED_SIZE)) + { + scError = SYMCRYPT_WRONG_DATA_SIZE; + goto cleanup; + } + + // Concatenating the label and the seed + pbTmp = rbLabelAndSeed; + if( cbLabel > 0 ) + { + memcpy(pbTmp, pbLabel, cbLabel); + pbTmp += cbLabel; + } + memcpy(pbTmp, pbSeed, cbSeed); + cbLabelAndSeed = cbLabel + cbSeed; + + // Build A(1)'s + SymCryptTlsPrfMac( + SymCryptHmacMd5Algorithm, + (PCSYMCRYPT_MAC_EXPANDED_KEY)&pExpandedKey->macMd5Key, + rbLabelAndSeed, // This is A(0) + cbLabelAndSeed, + NULL, // No "seed" part for A(i)'s + 0, + rbAiMd5); + + SymCryptTlsPrfMac( + SymCryptHmacSha1Algorithm, + (PCSYMCRYPT_MAC_EXPANDED_KEY)&pExpandedKey->macSha1Key, + rbLabelAndSeed, // This is A(0) + cbLabelAndSeed, + NULL, // No "seed" part for A(i)'s + 0, + rbAiSha1); + + // Calculate the output + pbTmp = pbResult; + while (cbBytesToWrite > 0) + { + // Calculate the two P_Hashes up to SYMCRYPT_TLS_1_1_CHUNK_SIZE bytes + + // P_MD5 + SymCryptTlsPrfPHash( + SymCryptHmacMd5Algorithm, + (PCSYMCRYPT_MAC_EXPANDED_KEY)&pExpandedKey->macMd5Key, + rbLabelAndSeed, + cbLabelAndSeed, + rbAiMd5, + SYMCRYPT_HMAC_MD5_RESULT_SIZE, + rbPartialResultMd5, + SYMCRYPT_MIN(cbBytesToWrite, SYMCRYPT_TLS_1_1_CHUNK_SIZE), + rbAiMd5, + SYMCRYPT_HMAC_MD5_RESULT_SIZE); + + // P_SHA1 + SymCryptTlsPrfPHash( + SymCryptHmacSha1Algorithm, + (PCSYMCRYPT_MAC_EXPANDED_KEY)&pExpandedKey->macSha1Key, + rbLabelAndSeed, + cbLabelAndSeed, + rbAiSha1, + SYMCRYPT_HMAC_SHA1_RESULT_SIZE, + rbPartialResultSha1, + SYMCRYPT_MIN(cbBytesToWrite, SYMCRYPT_TLS_1_1_CHUNK_SIZE), + rbAiSha1, + SYMCRYPT_HMAC_SHA1_RESULT_SIZE); + + // XOR the two into the output + SymCryptXorBytes( + rbPartialResultMd5, + rbPartialResultSha1, + pbTmp, + SYMCRYPT_MIN(cbBytesToWrite, SYMCRYPT_TLS_1_1_CHUNK_SIZE)); + + if (cbBytesToWrite <= SYMCRYPT_TLS_1_1_CHUNK_SIZE) + { + break; + } + + cbBytesToWrite -= SYMCRYPT_TLS_1_1_CHUNK_SIZE; + pbTmp += SYMCRYPT_TLS_1_1_CHUNK_SIZE; + + } + +cleanup: + SymCryptWipeKnownSize(rbLabelAndSeed, sizeof(rbLabelAndSeed)); + SymCryptWipeKnownSize(rbAiMd5, sizeof(rbAiMd5)); + SymCryptWipeKnownSize(rbPartialResultMd5, sizeof(rbPartialResultMd5)); + SymCryptWipeKnownSize(rbAiSha1, sizeof(rbAiSha1)); + SymCryptWipeKnownSize(rbPartialResultSha1, sizeof(rbPartialResultSha1)); + + return scError; +} + +// +// The following PRF is defined in RFC 5246: +// +// TLS's PRF is created by applying P_hash to the secret as: +// +// PRF(secret, label, seed) = P_<hash>(secret, label + seed) +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptTlsPrf1_2Derive( + _In_ PCSYMCRYPT_TLSPRF1_2_EXPANDED_KEY pExpandedKey, + _In_reads_opt_(cbLabel) PCBYTE pbLabel, + _In_ SIZE_T cbLabel, + _In_reads_(cbSeed) PCBYTE pbSeed, + _In_ SIZE_T cbSeed, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + SYMCRYPT_ALIGN BYTE rbLabelAndSeed[SYMCRYPT_TLS_MAX_LABEL_AND_SEED_SIZE]; + BYTE * pbTmp; + + // Size checks + if ((cbLabel > SYMCRYPT_TLS_MAX_LABEL_SIZE) || (cbSeed > SYMCRYPT_TLS_MAX_SEED_SIZE)) + { + scError = SYMCRYPT_WRONG_DATA_SIZE; + goto cleanup; + } + + // Concatenating the label and the seed + pbTmp = rbLabelAndSeed; + if( cbLabel > 0 ) + { + memcpy(pbTmp, pbLabel, cbLabel); + pbTmp += cbLabel; + } + memcpy(pbTmp, pbSeed, cbSeed); + + // + // According to RFC 2104 (HMAC), hash the secret if its length + // exceeds the basic compression block length. This is taken + // care by the specific HMAC inside SymCryptTlsPrfPHash. + // + SymCryptTlsPrfPHash( + pExpandedKey->macAlg, + &pExpandedKey->macKey, + rbLabelAndSeed, + cbLabel + cbSeed, + NULL, + 0, + pbResult, + cbResult, + NULL, + 0); + +cleanup: + SymCryptWipeKnownSize(rbLabelAndSeed, sizeof(rbLabelAndSeed)); + + return scError; +} + +// +// The full TLS 1.0/1.1 Key Derivation Function +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptTlsPrf1_1( + _In_reads_(cbKey) PCBYTE pbKey, + _In_ SIZE_T cbKey, + _In_reads_opt_(cbLabel) PCBYTE pbLabel, + _In_ SIZE_T cbLabel, + _In_reads_(cbSeed) PCBYTE pbSeed, + _In_ SIZE_T cbSeed, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + SYMCRYPT_TLSPRF1_1_EXPANDED_KEY key; + + // Create the expanded key + scError = SymCryptTlsPrf1_1ExpandKey(&key, pbKey, cbKey); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // Derive the key + scError = SymCryptTlsPrf1_1Derive( + &key, + pbLabel, + cbLabel, + pbSeed, + cbSeed, + pbResult, + cbResult); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + +cleanup: + SymCryptWipeKnownSize(&key, sizeof(key)); + + return scError; +} + + +// +// The full TLS 1.2 Key Derivation Function +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptTlsPrf1_2( + _In_ PCSYMCRYPT_MAC pMacAlgorithm, + _In_reads_(cbKey) PCBYTE pbKey, + _In_ SIZE_T cbKey, + _In_reads_opt_(cbLabel) PCBYTE pbLabel, + _In_ SIZE_T cbLabel, + _In_reads_(cbSeed) PCBYTE pbSeed, + _In_ SIZE_T cbSeed, + _Out_writes_(cbResult) PBYTE pbResult, + SIZE_T cbResult) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + SYMCRYPT_TLSPRF1_2_EXPANDED_KEY key; + + // Create the expanded key + scError = SymCryptTlsPrf1_2ExpandKey(&key, pMacAlgorithm, pbKey, cbKey); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // Derive the key + scError = SymCryptTlsPrf1_2Derive( + &key, + pbLabel, + cbLabel, + pbSeed, + cbSeed, + pbResult, + cbResult); + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + +cleanup: + SymCryptWipeKnownSize(&key, sizeof(key)); + + return scError; +} diff --git a/libs/symcrypt/lib/xmss.c b/libs/symcrypt/lib/xmss.c new file mode 100644 index 00000000000..1bd9b116c96 --- /dev/null +++ b/libs/symcrypt/lib/xmss.c @@ -0,0 +1,2129 @@ +// +// xmss.c XMSS and XMSS^MT implementation +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +// +// See the symcrypt.h file for documentation on what the various functions do. +// + + +// Maximum size of the domain separator prefix used in PRFs +#define SYMCRYPT_XMSS_MAX_PREFIX_SIZE SYMCRYPT_HASH_MAX_RESULT_SIZE + +// PRF domain separators +#define SYMCRYPT_XMSS_F 0x00 +#define SYMCRYPT_XMSS_H 0x01 +#define SYMCRYPT_XMSS_H_MSG 0x02 +#define SYMCRYPT_XMSS_PRF 0x03 +#define SYMCRYPT_XMSS_PRF_KEYGEN 0x04 + + +static const PCSYMCRYPT_HASH* XmssHashArray[] = { + &SymCryptSha256Algorithm, // 0 + &SymCryptSha512Algorithm, // 1 + &SymCryptShake128HashAlgorithm, // 2 + &SymCryptShake256HashAlgorithm, // 3 +}; + + +typedef enum _SYMCRYPT_XMSS_WOTSP_ALGID +{ + // Hash Fn. RFC-8391 SP800-208 + SYMCRYPT_XMSS_WOTSP_SHA2_256 = 0x00000001, // SHA-256 X X + SYMCRYPT_XMSS_WOTSP_SHA2_512 = 0x00000002, // SHA-512 X + SYMCRYPT_XMSS_WOTSP_SHAKE_256 = 0x00000003, // SHAKE128 X + SYMCRYPT_XMSS_WOTSP_SHAKE_512 = 0x00000004, // SHAKE256 X + SYMCRYPT_XMSS_WOTSP_SHA2_192 = 0x00000005, // SHA-256 X + SYMCRYPT_XMSS_WOTSP_SHAKE256_256 = 0x00000006, // SHAKE256 X + SYMCRYPT_XMSS_WOTSP_SHAKE256_192 = 0x00000007, // SHAKE256 X + +} SYMCRYPT_XMSS_WOTSP_ALGID, *PSYMCRYPT_XMSS_WOTSP_ALGID; + + +typedef struct _SYMCRYPT_XMSS_WOTSP_PARAMS +{ + SYMCRYPT_XMSS_WOTSP_ALGID wotspId; + UINT8 hashIndex; + UINT8 n; + UINT8 w; + UINT8 cbPrefix; + +} SYMCRYPT_XMSS_WOTSP_PARAMS, *PSYMCRYPT_XMSS_WOTSP_PARAMS; + + +static const SYMCRYPT_XMSS_WOTSP_PARAMS XmssWotspParams[] = +{ + // wotspId hashIndex n w cbPrefix + { SYMCRYPT_XMSS_WOTSP_SHA2_256, 0, 32, 4, 32 }, // SHA-256 + { SYMCRYPT_XMSS_WOTSP_SHA2_512, 1, 64, 4, 64 }, // SHA-512 + { SYMCRYPT_XMSS_WOTSP_SHAKE_256, 2, 32, 4, 32 }, // SHAKE128 + { SYMCRYPT_XMSS_WOTSP_SHAKE_512, 3, 64, 4, 64 }, // SHAKE256 + { SYMCRYPT_XMSS_WOTSP_SHA2_192, 0, 24, 4, 4 }, // SHA-256 + { SYMCRYPT_XMSS_WOTSP_SHAKE256_256, 3, 32, 4, 32 }, // SHAKE256 + { SYMCRYPT_XMSS_WOTSP_SHAKE256_192, 3, 24, 4, 4 }, // SHAKE256 +}; + +typedef struct _SYMCRYPT_XMSS_PARAMETER_PREDEFINED +{ + UINT32 idAlg; + + SYMCRYPT_XMSS_WOTSP_ALGID idWotsp; + + // total tree height (each level has height h/d) + UINT8 h; + + // number of layers (for single tree, d=1) + UINT8 d; + +} SYMCRYPT_XMSS_PARAMETER_PREDEFINED; + +typedef SYMCRYPT_XMSS_PARAMETER_PREDEFINED* PSYMCRYPT_XMSS_PARAMETER_PREDEFINED; +typedef const SYMCRYPT_XMSS_PARAMETER_PREDEFINED* PCSYMCRYPT_XMSS_PARAMETER_PREDEFINED; + + +static const SYMCRYPT_XMSS_PARAMETER_PREDEFINED XmssParametersPredefined[] = { + + // algId wotspId/wotspIndex h d + { SYMCRYPT_XMSS_SHA2_10_256, SYMCRYPT_XMSS_WOTSP_SHA2_256, 10, 1 }, + { SYMCRYPT_XMSS_SHA2_16_256, SYMCRYPT_XMSS_WOTSP_SHA2_256, 16, 1 }, + { SYMCRYPT_XMSS_SHA2_20_256, SYMCRYPT_XMSS_WOTSP_SHA2_256, 20, 1 }, + { SYMCRYPT_XMSS_SHA2_10_512, SYMCRYPT_XMSS_WOTSP_SHA2_512, 10, 1 }, + { SYMCRYPT_XMSS_SHA2_16_512, SYMCRYPT_XMSS_WOTSP_SHA2_512, 16, 1 }, + { SYMCRYPT_XMSS_SHA2_20_512, SYMCRYPT_XMSS_WOTSP_SHA2_512, 20, 1 }, + { SYMCRYPT_XMSS_SHAKE_10_256, SYMCRYPT_XMSS_WOTSP_SHAKE_256, 10, 1 }, + { SYMCRYPT_XMSS_SHAKE_16_256, SYMCRYPT_XMSS_WOTSP_SHAKE_256, 16, 1 }, + { SYMCRYPT_XMSS_SHAKE_20_256, SYMCRYPT_XMSS_WOTSP_SHAKE_256, 20, 1 }, + { SYMCRYPT_XMSS_SHAKE_10_512, SYMCRYPT_XMSS_WOTSP_SHAKE_512, 10, 1 }, + { SYMCRYPT_XMSS_SHAKE_16_512, SYMCRYPT_XMSS_WOTSP_SHAKE_512, 16, 1 }, + { SYMCRYPT_XMSS_SHAKE_20_512, SYMCRYPT_XMSS_WOTSP_SHAKE_512, 20, 1 }, + { SYMCRYPT_XMSS_SHA2_10_192, SYMCRYPT_XMSS_WOTSP_SHA2_192, 10, 1 }, + { SYMCRYPT_XMSS_SHA2_16_192, SYMCRYPT_XMSS_WOTSP_SHA2_192, 16, 1 }, + { SYMCRYPT_XMSS_SHA2_20_192, SYMCRYPT_XMSS_WOTSP_SHA2_192, 20, 1 }, + { SYMCRYPT_XMSS_SHAKE256_10_256, SYMCRYPT_XMSS_WOTSP_SHAKE256_256, 10, 1 }, + { SYMCRYPT_XMSS_SHAKE256_16_256, SYMCRYPT_XMSS_WOTSP_SHAKE256_256, 16, 1 }, + { SYMCRYPT_XMSS_SHAKE256_20_256, SYMCRYPT_XMSS_WOTSP_SHAKE256_256, 20, 1 }, + { SYMCRYPT_XMSS_SHAKE256_10_192, SYMCRYPT_XMSS_WOTSP_SHAKE256_192, 10, 1 }, + { SYMCRYPT_XMSS_SHAKE256_16_192, SYMCRYPT_XMSS_WOTSP_SHAKE256_192, 16, 1 }, + { SYMCRYPT_XMSS_SHAKE256_20_192, SYMCRYPT_XMSS_WOTSP_SHAKE256_192, 20, 1 }, +}; + + +static const SYMCRYPT_XMSS_PARAMETER_PREDEFINED XmssMtParametersPredefined[] = { + + // algId wotspId/wotspIndex h d + { SYMCRYPT_XMSSMT_SHA2_20_2_256, SYMCRYPT_XMSS_WOTSP_SHA2_256, 20, 2 }, + { SYMCRYPT_XMSSMT_SHA2_20_4_256, SYMCRYPT_XMSS_WOTSP_SHA2_256, 20, 4 }, + { SYMCRYPT_XMSSMT_SHA2_40_2_256, SYMCRYPT_XMSS_WOTSP_SHA2_256, 40, 2 }, + { SYMCRYPT_XMSSMT_SHA2_40_4_256, SYMCRYPT_XMSS_WOTSP_SHA2_256, 40, 4 }, + { SYMCRYPT_XMSSMT_SHA2_40_8_256, SYMCRYPT_XMSS_WOTSP_SHA2_256, 40, 8 }, + { SYMCRYPT_XMSSMT_SHA2_60_3_256, SYMCRYPT_XMSS_WOTSP_SHA2_256, 60, 3 }, + { SYMCRYPT_XMSSMT_SHA2_60_6_256, SYMCRYPT_XMSS_WOTSP_SHA2_256, 60, 6 }, + { SYMCRYPT_XMSSMT_SHA2_60_12_256, SYMCRYPT_XMSS_WOTSP_SHA2_256, 60, 12 }, + + { SYMCRYPT_XMSSMT_SHA2_20_2_512, SYMCRYPT_XMSS_WOTSP_SHA2_512, 20, 2 }, + { SYMCRYPT_XMSSMT_SHA2_20_4_512, SYMCRYPT_XMSS_WOTSP_SHA2_512, 20, 4 }, + { SYMCRYPT_XMSSMT_SHA2_40_2_512, SYMCRYPT_XMSS_WOTSP_SHA2_512, 40, 2 }, + { SYMCRYPT_XMSSMT_SHA2_40_4_512, SYMCRYPT_XMSS_WOTSP_SHA2_512, 40, 4 }, + { SYMCRYPT_XMSSMT_SHA2_40_8_512, SYMCRYPT_XMSS_WOTSP_SHA2_512, 40, 8 }, + { SYMCRYPT_XMSSMT_SHA2_60_3_512, SYMCRYPT_XMSS_WOTSP_SHA2_512, 60, 3 }, + { SYMCRYPT_XMSSMT_SHA2_60_6_512, SYMCRYPT_XMSS_WOTSP_SHA2_512, 60, 6 }, + { SYMCRYPT_XMSSMT_SHA2_60_12_512, SYMCRYPT_XMSS_WOTSP_SHA2_512, 60, 12 }, + + { SYMCRYPT_XMSSMT_SHAKE_20_2_256, SYMCRYPT_XMSS_WOTSP_SHAKE_256, 20, 2 }, + { SYMCRYPT_XMSSMT_SHAKE_20_4_256, SYMCRYPT_XMSS_WOTSP_SHAKE_256, 20, 4 }, + { SYMCRYPT_XMSSMT_SHAKE_40_2_256, SYMCRYPT_XMSS_WOTSP_SHAKE_256, 40, 2 }, + { SYMCRYPT_XMSSMT_SHAKE_40_4_256, SYMCRYPT_XMSS_WOTSP_SHAKE_256, 40, 4 }, + { SYMCRYPT_XMSSMT_SHAKE_40_8_256, SYMCRYPT_XMSS_WOTSP_SHAKE_256, 40, 8 }, + { SYMCRYPT_XMSSMT_SHAKE_60_3_256, SYMCRYPT_XMSS_WOTSP_SHAKE_256, 60, 3 }, + { SYMCRYPT_XMSSMT_SHAKE_60_6_256, SYMCRYPT_XMSS_WOTSP_SHAKE_256, 60, 6 }, + { SYMCRYPT_XMSSMT_SHAKE_60_12_256, SYMCRYPT_XMSS_WOTSP_SHAKE_256, 60, 12 }, + + { SYMCRYPT_XMSSMT_SHAKE_20_2_512, SYMCRYPT_XMSS_WOTSP_SHAKE_512, 20, 2 }, + { SYMCRYPT_XMSSMT_SHAKE_20_4_512, SYMCRYPT_XMSS_WOTSP_SHAKE_512, 20, 4 }, + { SYMCRYPT_XMSSMT_SHAKE_40_2_512, SYMCRYPT_XMSS_WOTSP_SHAKE_512, 40, 2 }, + { SYMCRYPT_XMSSMT_SHAKE_40_4_512, SYMCRYPT_XMSS_WOTSP_SHAKE_512, 40, 4 }, + { SYMCRYPT_XMSSMT_SHAKE_40_8_512, SYMCRYPT_XMSS_WOTSP_SHAKE_512, 40, 8 }, + { SYMCRYPT_XMSSMT_SHAKE_60_3_512, SYMCRYPT_XMSS_WOTSP_SHAKE_512, 60, 3 }, + { SYMCRYPT_XMSSMT_SHAKE_60_6_512, SYMCRYPT_XMSS_WOTSP_SHAKE_512, 60, 6 }, + { SYMCRYPT_XMSSMT_SHAKE_60_12_512, SYMCRYPT_XMSS_WOTSP_SHAKE_512, 60, 12 }, + + { SYMCRYPT_XMSSMT_SHA2_20_2_192, SYMCRYPT_XMSS_WOTSP_SHA2_192, 20, 2 }, + { SYMCRYPT_XMSSMT_SHA2_20_4_192, SYMCRYPT_XMSS_WOTSP_SHA2_192, 20, 4 }, + { SYMCRYPT_XMSSMT_SHA2_40_2_192, SYMCRYPT_XMSS_WOTSP_SHA2_192, 40, 2 }, + { SYMCRYPT_XMSSMT_SHA2_40_4_192, SYMCRYPT_XMSS_WOTSP_SHA2_192, 40, 4 }, + { SYMCRYPT_XMSSMT_SHA2_40_8_192, SYMCRYPT_XMSS_WOTSP_SHA2_192, 40, 8 }, + { SYMCRYPT_XMSSMT_SHA2_60_3_192, SYMCRYPT_XMSS_WOTSP_SHA2_192, 60, 3 }, + { SYMCRYPT_XMSSMT_SHA2_60_6_192, SYMCRYPT_XMSS_WOTSP_SHA2_192, 60, 6 }, + { SYMCRYPT_XMSSMT_SHA2_60_12_192, SYMCRYPT_XMSS_WOTSP_SHA2_192, 60, 12 }, + + { SYMCRYPT_XMSSMT_SHAKE256_20_2_256, SYMCRYPT_XMSS_WOTSP_SHAKE256_256, 20, 2 }, + { SYMCRYPT_XMSSMT_SHAKE256_20_4_256, SYMCRYPT_XMSS_WOTSP_SHAKE256_256, 20, 4 }, + { SYMCRYPT_XMSSMT_SHAKE256_40_2_256, SYMCRYPT_XMSS_WOTSP_SHAKE256_256, 40, 2 }, + { SYMCRYPT_XMSSMT_SHAKE256_40_4_256, SYMCRYPT_XMSS_WOTSP_SHAKE256_256, 40, 4 }, + { SYMCRYPT_XMSSMT_SHAKE256_40_8_256, SYMCRYPT_XMSS_WOTSP_SHAKE256_256, 40, 8 }, + { SYMCRYPT_XMSSMT_SHAKE256_60_3_256, SYMCRYPT_XMSS_WOTSP_SHAKE256_256, 60, 3 }, + { SYMCRYPT_XMSSMT_SHAKE256_60_6_256, SYMCRYPT_XMSS_WOTSP_SHAKE256_256, 60, 6 }, + { SYMCRYPT_XMSSMT_SHAKE256_60_12_256, SYMCRYPT_XMSS_WOTSP_SHAKE256_256, 60, 12 }, + + { SYMCRYPT_XMSSMT_SHAKE256_20_2_192, SYMCRYPT_XMSS_WOTSP_SHAKE256_192, 20, 2 }, + { SYMCRYPT_XMSSMT_SHAKE256_20_4_192, SYMCRYPT_XMSS_WOTSP_SHAKE256_192, 20, 4 }, + { SYMCRYPT_XMSSMT_SHAKE256_40_2_192, SYMCRYPT_XMSS_WOTSP_SHAKE256_192, 40, 2 }, + { SYMCRYPT_XMSSMT_SHAKE256_40_4_192, SYMCRYPT_XMSS_WOTSP_SHAKE256_192, 40, 4 }, + { SYMCRYPT_XMSSMT_SHAKE256_40_8_192, SYMCRYPT_XMSS_WOTSP_SHAKE256_192, 40, 8 }, + { SYMCRYPT_XMSSMT_SHAKE256_60_3_192, SYMCRYPT_XMSS_WOTSP_SHAKE256_192, 60, 3 }, + { SYMCRYPT_XMSSMT_SHAKE256_60_6_192, SYMCRYPT_XMSS_WOTSP_SHAKE256_192, 60, 6 }, + { SYMCRYPT_XMSSMT_SHAKE256_60_12_192, SYMCRYPT_XMSS_WOTSP_SHAKE256_192, 60, 12 }, +}; + +// +// Compute the number of chains for an n-byte input and its checksum +// for Winternitz parameter w (i.e., using w-bit blocks) in an OTS scheme +// +VOID +SYMCRYPT_CALL +SymCryptHbsGetWinternitzLengths( + UINT32 n, + UINT32 w, + _Out_ PUINT32 puLen1, + _Out_ PUINT32 puLen2 + ) +{ + UINT32 len1; + UINT32 len2; + UINT32 maxChecksum; + UINT32 msb; + + SYMCRYPT_ASSERT(n > 0); + SYMCRYPT_ASSERT(w >= 1 && w <= 8); + + // number of w-bit digits in an n-byte input + len1 = (8 * n + (w - 1)) / w; + + // maximum value the checksum can take (each w-bit digit can have value at most 2^w-1) + maxChecksum = len1 * ((1 << w) - 1); + + msb = 31 - SymCryptCountLeadingZeros32(maxChecksum); + + // msb + 1 bits are required to store the maxChecksum, + // calculate the number of w-bit blocks to represent that + len2 = (msb + 1 + (w - 1)) / w; + + *puLen1 = len1; + *puLen2 = len2; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmssGetWotspParams( + SYMCRYPT_XMSS_WOTSP_ALGID id, + _Out_ PSYMCRYPT_XMSS_PARAMS pParams ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + for (UINT32 i = 0; i < SYMCRYPT_ARRAY_SIZE(XmssWotspParams); i++) + { + if (XmssWotspParams[i].wotspId == id) + { + SYMCRYPT_ASSERT(XmssWotspParams[i].hashIndex < SYMCRYPT_ARRAY_SIZE(XmssHashArray)); + pParams->hash = *XmssHashArray[XmssWotspParams[i].hashIndex]; + pParams->cbHashOutput = XmssWotspParams[i].n; + pParams->nWinternitzWidth = XmssWotspParams[i].w; + pParams->cbPrefix = XmssWotspParams[i].cbPrefix; + goto cleanup; + } + } + + scError = SYMCRYPT_INVALID_ARGUMENT; + +cleanup: + + return scError; +} + +// +// Derive XMSS parameters that can be computed from others +// +// SYMCRYPT_XMSS_PARAMS structure must be initialized with either predefined +// or user defined parameters before this function is called. +// +VOID +SYMCRYPT_CALL +SymCryptXmssDeriveParams( + _Inout_ PSYMCRYPT_XMSS_PARAMS pParams ) +{ + SymCryptHbsGetWinternitzLengths( + pParams->cbHashOutput, + pParams->nWinternitzWidth, + &pParams->len1, + &pParams->len2); + + pParams->len = pParams->len1 + pParams->len2; + + UINT32 nChecksumBits = pParams->len2 * pParams->nWinternitzWidth; + SYMCRYPT_ASSERT(nChecksumBits <= 32); + pParams->nLeftShift32 = (UINT8)(32 - nChecksumBits); + + if (pParams->nLayers == 1) + { + // single trees have a 32-bit Idx value + pParams->cbIdx = 4; + } + else + { + // number of bytes to store h-bits for Idx + pParams->cbIdx = (pParams->nTotalTreeHeight + 7) / 8; + } + + pParams->nLayerHeight = pParams->nTotalTreeHeight / pParams->nLayers; +} + + +// +// Fill a SYMCRYPT_XMSS_PARAMS structure from either an XMSS algorithm ID or +// XMSS^MT algorithm ID from predefined parameter sets. +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmssParamsFromAlgIdCommon( + UINT32 id, + BOOLEAN isMultiTree, + _Out_ PSYMCRYPT_XMSS_PARAMS pParams ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_INVALID_ARGUMENT; + PCSYMCRYPT_XMSS_PARAMETER_PREDEFINED pParameters = NULL; + SIZE_T uParameterCount; + + SymCryptWipeKnownSize(pParams, sizeof(*pParams)); + + if (isMultiTree) + { + pParameters = XmssMtParametersPredefined; + uParameterCount = SYMCRYPT_ARRAY_SIZE(XmssMtParametersPredefined); + } + else + { + pParameters = XmssParametersPredefined; + uParameterCount = SYMCRYPT_ARRAY_SIZE(XmssParametersPredefined); + } + + for (UINT32 i = 0; i < uParameterCount; i++) + { + if (pParameters[i].idAlg == id) + { + scError = SymCryptXmssGetWotspParams(pParameters[i].idWotsp, pParams); + + if (scError == SYMCRYPT_NO_ERROR) + { + SYMCRYPT_ASSERT(pParams->cbHashOutput <= SYMCRYPT_HASH_MAX_RESULT_SIZE); + + pParams->id = id; + pParams->nTotalTreeHeight = pParameters[i].h; + pParams->nLayers = pParameters[i].d; + SymCryptXmssDeriveParams(pParams); + } + + break; + } + } + + return scError; +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmssParamsFromAlgId( + SYMCRYPT_XMSS_ALGID id, + _Out_ PSYMCRYPT_XMSS_PARAMS pParams ) +{ + return SymCryptXmssParamsFromAlgIdCommon(id, FALSE, pParams); +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmssMtParamsFromAlgId( + SYMCRYPT_XMSSMT_ALGID id, + _Out_ PSYMCRYPT_XMSS_PARAMS pParams) +{ + return SymCryptXmssParamsFromAlgIdCommon(id, TRUE, pParams); +} + + +// +// Set custom XMSS/XMSS^MT parameters +// +// This function can be used to initialize SYMCRYPT_XMSS_PARAMS with +// custom parameters that are not defined by the standards. +// +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmssSetParams( + _Out_ PSYMCRYPT_XMSS_PARAMS pParams, + UINT32 id, + _In_ PCSYMCRYPT_HASH pHash, // hash algorithm + UINT32 cbHashOutput, // hash output size + UINT32 nWinternitzWidth, // Winternitz parameter + UINT32 nTotalTreeHeight, // total tree height + UINT32 nLayers, // number of layers + UINT32 cbPrefix // domain separator prefix length + ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + if (pParams == NULL) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + SymCryptWipeKnownSize(pParams, sizeof(*pParams)); + + if (pHash == NULL) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Output size n can at most be equal to the hash output size + if (cbHashOutput == 0 || + cbHashOutput > pHash->resultSize || + cbHashOutput > SYMCRYPT_HASH_MAX_RESULT_SIZE) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Winternitz parameter must be one of 1, 2, 4, or 8 + if (nWinternitzWidth == 0 || + nWinternitzWidth > 8 || + (nWinternitzWidth & (nWinternitzWidth - 1)) != 0) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // nTotalTreeHeight and nLayers must both be positive and + // nLayers must divide nTotalTreeHeight + if (nTotalTreeHeight == 0 || + nLayers == 0 || + (nTotalTreeHeight % nLayers) != 0) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Layer height (tree height of one layer) can be at most 31 + if ((nTotalTreeHeight / nLayers) > 31) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Total tree height can be at most 63 + if (nTotalTreeHeight > 63) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + if (cbPrefix == 0) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + pParams->id = id; + pParams->hash = pHash; + pParams->cbHashOutput = cbHashOutput; + pParams->nWinternitzWidth = nWinternitzWidth; + pParams->nTotalTreeHeight = nTotalTreeHeight; + pParams->nLayers = nLayers; + SymCryptXmssDeriveParams(pParams); + + pParams->cbPrefix = cbPrefix; + +cleanup: + + return scError; +} + + +// +// Updates the type field in ADRS structure and clears the +// subsequent fields. +// +// Does not modify the first two fields (Layer and Tree) of +// the ADRS structure. +// +VOID +SYMCRYPT_CALL +SymCryptXmssSetAdrsType( + _Out_ PXMSS_ADRS adrs, + UINT32 type ) +{ + SYMCRYPT_STORE_MSBFIRST32(adrs->en32Type, type); + SymCryptWipeKnownSize(&adrs->u, sizeof(adrs->u)); + SYMCRYPT_STORE_MSBFIRST32(adrs->en32KeyAndMask, 0); +} + + +SIZE_T +SYMCRYPT_CALL +SymCryptXmssSizeofSignatureFromParams( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams ) +{ + SYMCRYPT_ASSERT(pParams->nLayers != 0); + SYMCRYPT_ASSERT((pParams->nTotalTreeHeight % pParams->nLayers) == 0); + SYMCRYPT_ASSERT(pParams->nLayerHeight > 0); + + SIZE_T size = 0; + size += pParams->cbIdx; // idx + size += pParams->cbHashOutput; // randomness + + // WOTSP signature + authentication path for each layer + size += pParams->nLayers * ( pParams->cbHashOutput * (pParams->len + pParams->nLayerHeight) ); + + return size; +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmssSizeofKeyBlobFromParams( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams, + SYMCRYPT_XMSSKEY_TYPE keyType, + _Out_ SIZE_T* pcbKey ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + SIZE_T cbPublicKey = 0; + SIZE_T cbPrivateKey = 0; + + // Public Key + cbPublicKey += sizeof(UINT32); // Alg ID + cbPublicKey += 2 * pParams->cbHashOutput; // Root and Seed + + // Private Key (on top of the public key) + cbPrivateKey = cbPublicKey; + cbPrivateKey += sizeof(UINT64); // Idx + cbPrivateKey += 2 * pParams->cbHashOutput; // SK_XMSS and SK_PRF + + switch (keyType) + { + case SYMCRYPT_XMSSKEY_TYPE_PUBLIC: + *pcbKey = cbPublicKey; + break; + + case SYMCRYPT_XMSSKEY_TYPE_PRIVATE: + *pcbKey = cbPrivateKey; + break; + + default: + scError = SYMCRYPT_INVALID_ARGUMENT; + break; + } + + return scError; +} + +PSYMCRYPT_XMSS_KEY +SYMCRYPT_CALL +SymCryptXmsskeyAllocate( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams, + UINT32 flags ) +{ + PSYMCRYPT_XMSS_KEY pKey = NULL; + + // No flags allowed + if (flags != 0) + { + goto cleanup; + } + + SIZE_T cbSize = sizeof(SYMCRYPT_XMSS_KEY); + + pKey = SymCryptCallbackAlloc(cbSize); + + if (pKey == NULL) + { + goto cleanup; + } + + SymCryptWipe(pKey, cbSize); + pKey->version = 1; + pKey->keyType = SYMCRYPT_XMSSKEY_TYPE_NONE; + pKey->params = *pParams; + + SYMCRYPT_SET_MAGIC(pKey); + + cleanup: + + return pKey; +} + + +VOID +SYMCRYPT_CALL +SymCryptXmsskeyFree( + _Inout_ PSYMCRYPT_XMSS_KEY pKey ) +{ + SYMCRYPT_CHECK_MAGIC(pKey); + SymCryptWipeKnownSize(pKey, sizeof(*pKey)); + SymCryptCallbackFree(pKey); +} + + +PSYMCRYPT_INCREMENTAL_TREEHASH +SYMCRYPT_CALL +SymCryptHbsIncrementalTreehashInit( + UINT32 nLeaves, + PBYTE pbBuffer, + SIZE_T cbBuffer, + UINT32 cbHashResult, + PSYMCRYPT_INCREMENTAL_TREEHASH_FUNC funcCompressNodes, + PSYMCRYPT_XMSS_INCREMENTAL_TREEHASH_CONTEXT pContext ) +{ + UNREFERENCED_PARAMETER(cbBuffer); + + SYMCRYPT_ASSERT(cbBuffer >= SymCryptHbsSizeofScratchBytesForIncrementalTreehash(cbHashResult, nLeaves)); + + PSYMCRYPT_INCREMENTAL_TREEHASH pIncHash = (PSYMCRYPT_INCREMENTAL_TREEHASH)pbBuffer; + + pIncHash->cbNode = 2 * sizeof(UINT32) + cbHashResult; + pIncHash->nSize = 0; + pIncHash->nCapacity = SymCryptHbsIncrementalTreehashStackDepth(nLeaves); + pIncHash->nLastLeafIndex = 0; + pIncHash->funcCompressNodes = funcCompressNodes; + pIncHash->pContext = pContext; + + return pIncHash; +} + + +PSYMCRYPT_TREEHASH_NODE +SYMCRYPT_CALL +SymCryptHbsIncrementalTreehashGetNode( + _In_ PSYMCRYPT_INCREMENTAL_TREEHASH pIncHash, + SIZE_T index ) +{ + PBYTE pNode = (PBYTE)pIncHash->arrNodes; + + pNode += index * pIncHash->cbNode; + + return (PSYMCRYPT_TREEHASH_NODE)pNode; +} + + +PSYMCRYPT_TREEHASH_NODE +SYMCRYPT_CALL +SymCryptHbsIncrementalTreehashAllocNode( + _Inout_ PSYMCRYPT_INCREMENTAL_TREEHASH pIncHash, + UINT32 nLeafIndex ) +{ + SYMCRYPT_ASSERT(pIncHash->nSize < pIncHash->nCapacity); + + PSYMCRYPT_TREEHASH_NODE pNode = SymCryptHbsIncrementalTreehashGetNode(pIncHash, pIncHash->nSize); + + pNode->height = 0; + pNode->index = nLeafIndex; + + pIncHash->nSize++; + + return pNode; +} + + +VOID +SYMCRYPT_CALL +SymCryptHbsIncrementalTreehashGetTopNodes( + _Inout_ PSYMCRYPT_INCREMENTAL_TREEHASH pIncHash, + _Out_ PSYMCRYPT_TREEHASH_NODE *ppNodeLeft, + _Out_ PSYMCRYPT_TREEHASH_NODE *ppNodeRight ) +{ + *ppNodeRight = (pIncHash->nSize < 1) ? NULL : SymCryptHbsIncrementalTreehashGetNode(pIncHash, pIncHash->nSize - 1); + + *ppNodeLeft = (pIncHash->nSize < 2) ? NULL : SymCryptHbsIncrementalTreehashGetNode(pIncHash, pIncHash->nSize - 2); +} + + +PSYMCRYPT_TREEHASH_NODE +SYMCRYPT_CALL +SymCryptHbsIncrementalTreehashProcessCommon( + _Inout_ PSYMCRYPT_INCREMENTAL_TREEHASH pIncHash, + BOOLEAN fFinal ) +{ + PSYMCRYPT_TREEHASH_NODE pNodeLeft = NULL; + PSYMCRYPT_TREEHASH_NODE pNodeRight = NULL; + + SYMCRYPT_ASSERT(pIncHash->nSize > 0); + + SymCryptHbsIncrementalTreehashGetTopNodes(pIncHash, &pNodeLeft, &pNodeRight); + + while ( pNodeLeft && + (fFinal || (pNodeLeft->height == pNodeRight->height)) ) + { + pIncHash->funcCompressNodes( + pNodeLeft, + pNodeRight, + pNodeLeft, + pIncHash->pContext); + + pIncHash->nSize--; + + SymCryptHbsIncrementalTreehashGetTopNodes(pIncHash, &pNodeLeft, &pNodeRight); + } + + return pNodeRight; +} + + +PSYMCRYPT_TREEHASH_NODE +SYMCRYPT_CALL +SymCryptHbsIncrementalTreehashProcess( + _Inout_ PSYMCRYPT_INCREMENTAL_TREEHASH pIncHash) +{ + return SymCryptHbsIncrementalTreehashProcessCommon(pIncHash, FALSE); +} + + +PSYMCRYPT_TREEHASH_NODE +SYMCRYPT_CALL +SymCryptHbsIncrementalTreehashFinalize( + _Inout_ PSYMCRYPT_INCREMENTAL_TREEHASH pIncHash ) +{ + return SymCryptHbsIncrementalTreehashProcessCommon(pIncHash, TRUE); +} + + +UINT32 +SYMCRYPT_CALL +SymCryptHbsIncrementalTreehashStackDepth( + UINT32 nLeaves) +{ + UINT32 h; + + // Minimum height binary tree that contains nLeaves many leaves is h+1 + h = 31 - SymCryptCountLeadingZeros32(nLeaves); + + // Tree root computation will require a stack of depth equal to tree height plus 1 + return (h + 2); +} + + +SIZE_T +SYMCRYPT_CALL +SymCryptHbsSizeofScratchBytesForIncrementalTreehash( + UINT32 cbNode, + UINT32 nLeaves) +{ + SIZE_T nodeSize = cbNode + 2 * sizeof(UINT32); + SIZE_T result = (sizeof(SYMCRYPT_INCREMENTAL_TREEHASH) - sizeof(SYMCRYPT_TREEHASH_NODE)); + + result += nodeSize * SymCryptHbsIncrementalTreehashStackDepth(nLeaves); + return result; +} + + +VOID +SYMCRYPT_CALL +SymCryptXmssPrfInit( + _In_ PCSYMCRYPT_HASH hash, + BYTE PrfType, + SIZE_T prefixLength, + _Out_ PSYMCRYPT_HASH_STATE state ) +{ + BYTE prefix[SYMCRYPT_XMSS_MAX_PREFIX_SIZE]; + + SYMCRYPT_ASSERT(prefixLength <= SYMCRYPT_XMSS_MAX_PREFIX_SIZE); + + SymCryptWipe(prefix, prefixLength); + prefix[prefixLength - 1] = PrfType; + + SymCryptHashInit(hash, state); + SymCryptHashAppend(hash, state, prefix, prefixLength); +} + + +VOID +SYMCRYPT_CALL +SymCryptXmssPrfKey( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams, + _In_reads_bytes_( cbKey ) PCBYTE pbKey, + SIZE_T cbKey, + _Out_ SYMCRYPT_HASH_STATE *pState ) +{ + SymCryptXmssPrfInit(pParams->hash, SYMCRYPT_XMSS_PRF, pParams->cbPrefix, pState); + SymCryptHashAppend(pParams->hash, pState, pbKey, cbKey); +} + +VOID +SYMCRYPT_CALL +SymCryptXmssPrf( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams, + BYTE PrfType, + _In_reads_bytes_( cbKey ) PCBYTE pbKey, + SIZE_T cbKey, + _In_reads_bytes_( cbMsg ) PCBYTE pbMsg, + SIZE_T cbMsg, + _Out_writes_bytes_( pParams->cbHashOutput ) PBYTE pbOutput ) +{ + SYMCRYPT_HASH_STATE state; + + SymCryptXmssPrfInit(pParams->hash, PrfType, pParams->cbPrefix, &state); + SymCryptHashAppend(pParams->hash, &state, pbKey, cbKey); + SymCryptHashAppend(pParams->hash, &state, pbMsg, cbMsg); + SymCryptHashResult(pParams->hash, &state, pbOutput, pParams->cbHashOutput); +} + + +VOID +SYMCRYPT_CALL +SymCryptXmssRandHash( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams, + _Inout_ XMSS_ADRS *adrs, + _In_reads_bytes_( pParams->cbHashOutput ) PCBYTE pbSeed, + _In_reads_bytes_( pParams->cbHashOutput ) PCBYTE pbLeft, + _In_reads_bytes_( pParams->cbHashOutput ) PCBYTE pbRight, + _Out_writes_bytes_( pParams->cbHashOutput ) PBYTE pbOutput ) +{ + BYTE key[SYMCRYPT_HASH_MAX_RESULT_SIZE]; + BYTE bitmask[2 * SYMCRYPT_HASH_MAX_RESULT_SIZE]; + SYMCRYPT_HASH_STATE stateKeyed; + SYMCRYPT_HASH_STATE stateMask; + + SYMCRYPT_ASSERT(pParams->cbHashOutput <= SYMCRYPT_HASH_MAX_RESULT_SIZE); + + SymCryptXmssPrfKey(pParams, pbSeed, pParams->cbHashOutput, &stateKeyed); + + SYMCRYPT_STORE_MSBFIRST32(adrs->en32KeyAndMask, 1); + SymCryptHashStateCopy(pParams->hash, &stateKeyed, &stateMask); + SymCryptHashAppend(pParams->hash, &stateMask, (PCBYTE)adrs, sizeof(*adrs)); + SymCryptHashResult(pParams->hash, &stateMask, &bitmask[0], pParams->cbHashOutput); + + SYMCRYPT_STORE_MSBFIRST32(adrs->en32KeyAndMask, 2); + SymCryptHashStateCopy(pParams->hash, &stateKeyed, &stateMask); + SymCryptHashAppend(pParams->hash, &stateMask, (PCBYTE)adrs, sizeof(*adrs)); + SymCryptHashResult(pParams->hash, &stateMask, &bitmask[pParams->cbHashOutput], pParams->cbHashOutput); + + SYMCRYPT_STORE_MSBFIRST32(adrs->en32KeyAndMask, 0); + SymCryptHashAppend(pParams->hash, &stateKeyed, (PCBYTE)adrs, sizeof(*adrs)); + SymCryptHashResult(pParams->hash, &stateKeyed, key, pParams->cbHashOutput); + + SymCryptXorBytes(&bitmask[0], pbLeft, &bitmask[0], pParams->cbHashOutput); + SymCryptXorBytes(&bitmask[pParams->cbHashOutput], pbRight, &bitmask[pParams->cbHashOutput], pParams->cbHashOutput); + + SymCryptXmssPrf(pParams, SYMCRYPT_XMSS_H, key, pParams->cbHashOutput, bitmask, 2 * pParams->cbHashOutput, pbOutput); +} + + +VOID +SYMCRYPT_CALL +SymCryptXmssTreeNodeCompress( + _In_ PSYMCRYPT_TREEHASH_NODE pNodeLeft, + _In_ PSYMCRYPT_TREEHASH_NODE pNodeRight, + _Out_ PSYMCRYPT_TREEHASH_NODE pNodeOut, + _Inout_ PSYMCRYPT_XMSS_INCREMENTAL_TREEHASH_CONTEXT pCtxIncHash ) +{ + SYMCRYPT_STORE_MSBFIRST32(pCtxIncHash->adrs.u.hashtree.en32Height, pNodeLeft->height); + SYMCRYPT_STORE_MSBFIRST32(pCtxIncHash->adrs.u.hashtree.en32Index, pNodeLeft->index / 2); + + SymCryptXmssRandHash( + pCtxIncHash->pParams, + &pCtxIncHash->adrs, + pCtxIncHash->pbSeed, + pNodeLeft->value, + pNodeRight->value, + pNodeOut->value); + + pNodeOut->index = pNodeLeft->index / 2; + pNodeOut->height = pNodeLeft->height + 1; +} + +VOID +SYMCRYPT_CALL +SymCryptXmssLtreeNodeCompress( + _In_ PSYMCRYPT_TREEHASH_NODE pNodeLeft, + _In_ PSYMCRYPT_TREEHASH_NODE pNodeRight, + _Out_ PSYMCRYPT_TREEHASH_NODE pNodeOut, + _Inout_ PSYMCRYPT_XMSS_INCREMENTAL_TREEHASH_CONTEXT pCtxIncHash ) +{ + SYMCRYPT_STORE_MSBFIRST32(pCtxIncHash->adrs.u.ltree.en32Height, pNodeLeft->height); + SYMCRYPT_STORE_MSBFIRST32(pCtxIncHash->adrs.u.ltree.en32Index, pNodeLeft->index / 2); + + SymCryptXmssRandHash( + pCtxIncHash->pParams, + &pCtxIncHash->adrs, + pCtxIncHash->pbSeed, + pNodeLeft->value, + pNodeRight->value, + pNodeOut->value); + + pNodeOut->index = pNodeLeft->index / 2; + pNodeOut->height = pNodeLeft->height + 1; +} + +VOID +SYMCRYPT_CALL +SymCryptXmssCreateWotspSecret( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams, + _In_reads_bytes_( pParams->cbHashOutput ) PCBYTE pbSkXmss, + _In_reads_bytes_( pParams->cbHashOutput ) PCBYTE pbSeed, + _Inout_ XMSS_ADRS *adrs, + _Out_writes_bytes_( pParams->cbHashOutput ) PBYTE pbOutput ) +{ + SYMCRYPT_HASH_STATE state; + + SymCryptXmssPrfInit(pParams->hash, SYMCRYPT_XMSS_PRF_KEYGEN, pParams->cbPrefix, &state); + SymCryptHashAppend(pParams->hash, &state, pbSkXmss, pParams->cbHashOutput); + SymCryptHashAppend(pParams->hash, &state, pbSeed, pParams->cbHashOutput); + SymCryptHashAppend(pParams->hash, &state, (PCBYTE)adrs, sizeof(*adrs)); + SymCryptHashResult(pParams->hash, &state, pbOutput, pParams->cbHashOutput); +} + +VOID +SYMCRYPT_CALL +SymCryptXmssChain( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams, + _In_reads_bytes_( pParams->cbHashOutput ) PCBYTE pbInput, + UINT32 startIndex, + UINT32 steps, + _In_reads_bytes_( pParams->cbHashOutput ) PCBYTE pbSeed, + _Inout_ XMSS_ADRS *adrs, + _Out_writes_bytes_( pParams->cbHashOutput ) PBYTE pbOutput ) +{ + BYTE tmp[SYMCRYPT_HASH_MAX_RESULT_SIZE]; + BYTE key[SYMCRYPT_HASH_MAX_RESULT_SIZE]; + BYTE bm[SYMCRYPT_HASH_MAX_RESULT_SIZE]; + SYMCRYPT_HASH_STATE stateKey; + SYMCRYPT_HASH_STATE stateMask; + + memcpy(tmp, pbInput, pParams->cbHashOutput); + + for (UINT32 i = startIndex; i < startIndex + steps; i++) + { + SYMCRYPT_STORE_MSBFIRST32(adrs->u.ots.en32Hash, i); + + SymCryptXmssPrfKey(pParams, pbSeed, pParams->cbHashOutput, &stateKey); + SymCryptHashStateCopy(pParams->hash, &stateKey, &stateMask); + + SYMCRYPT_STORE_MSBFIRST32(adrs->en32KeyAndMask, 0); + SymCryptHashAppend(pParams->hash, &stateKey, (PCBYTE)adrs, sizeof(*adrs)); + SymCryptHashResult(pParams->hash, &stateKey, key, pParams->cbHashOutput); + + SYMCRYPT_STORE_MSBFIRST32(adrs->en32KeyAndMask, 1); + SymCryptHashAppend(pParams->hash, &stateMask, (PCBYTE)adrs, sizeof(*adrs)); + SymCryptHashResult(pParams->hash, &stateMask, bm, pParams->cbHashOutput); + + SymCryptXorBytes(tmp, bm, tmp, pParams->cbHashOutput); + + SymCryptXmssPrf(pParams, SYMCRYPT_XMSS_F, key, pParams->cbHashOutput, tmp, pParams->cbHashOutput, tmp); + } + + // reset used ADRS fields + SYMCRYPT_STORE_MSBFIRST32(adrs->u.ots.en32Hash, 0); + SYMCRYPT_STORE_MSBFIRST32(adrs->en32KeyAndMask, 0); + + memcpy(pbOutput, tmp, pParams->cbHashOutput); +} + + +VOID +SYMCRYPT_CALL +SymCryptXmssCreateWotspPublickey( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams, + _Inout_ XMSS_ADRS *adrs, + UINT32 uLeaf, + _In_reads_bytes_( pParams->cbHashOutput ) PCBYTE pbSkXmss, + _In_reads_bytes_( pParams->cbHashOutput ) PCBYTE pbSeed, + _Out_writes_bytes_opt_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch, + _Out_writes_bytes_( pParams->cbHashOutput ) PBYTE pbOutput ) +{ + PSYMCRYPT_INCREMENTAL_TREEHASH pIncHash = NULL; + PSYMCRYPT_TREEHASH_NODE pNode = NULL; + SYMCRYPT_XMSS_INCREMENTAL_TREEHASH_CONTEXT ctxIncHash; + + SYMCRYPT_ASSERT(cbScratch >= SymCryptHbsSizeofScratchBytesForIncrementalTreehash(pParams->cbHashOutput, pParams->len)); + + SymCryptXmssSetAdrsType(adrs, XMSS_ADRS_TYPE_LTREE); + SYMCRYPT_STORE_MSBFIRST32(adrs->u.ltree.en32Leaf, uLeaf); + + ctxIncHash.adrs = *adrs; + ctxIncHash.pParams = pParams; + ctxIncHash.pbSeed = pbSeed; + + pIncHash = SymCryptHbsIncrementalTreehashInit( + pParams->len, + pbScratch, + cbScratch, + pParams->cbHashOutput, + SymCryptXmssLtreeNodeCompress, + &ctxIncHash); + + for (UINT32 i = 0; i < pParams->len; i++) + { + pNode = SymCryptHbsIncrementalTreehashAllocNode(pIncHash, i); + + SymCryptXmssSetAdrsType(adrs, XMSS_ADRS_TYPE_OTS); + SYMCRYPT_STORE_MSBFIRST32(adrs->u.ots.en32Leaf, uLeaf); + SYMCRYPT_STORE_MSBFIRST32(adrs->u.ots.en32Chain, i); + + SymCryptXmssCreateWotspSecret( + pParams, + pbSkXmss, + pbSeed, + adrs, + pNode->value); + + SymCryptXmssChain( + pParams, + pNode->value, + 0, + (1 << pParams->nWinternitzWidth) - 1, + pbSeed, + adrs, + pNode->value); + + SymCryptHbsIncrementalTreehashProcess(pIncHash); + + } + + pNode = SymCryptHbsIncrementalTreehashFinalize(pIncHash); + + memcpy(pbOutput, pNode->value, pParams->cbHashOutput); +} + + +VOID +SYMCRYPT_CALL +SymCryptXmssComputeSubtreeRoot( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams, + _In_ XMSS_ADRS *adrs, + _In_reads_bytes_( pParams->cbHashOutput ) PCBYTE pbSkXmss, + _In_reads_bytes_( pParams->cbHashOutput ) PCBYTE pbSeed, + UINT32 uLeaf, + UINT32 uHeight, + _Out_writes_bytes_opt_( cbScratch ) PBYTE pbScratch, + SIZE_T cbScratch, + _Out_writes_bytes_( pParams->cbHashOutput ) PBYTE pbRoot ) +{ + UNREFERENCED_PARAMETER(cbScratch); + + PSYMCRYPT_INCREMENTAL_TREEHASH pIncHash = NULL; + PSYMCRYPT_TREEHASH_NODE pNode = NULL; + SYMCRYPT_XMSS_INCREMENTAL_TREEHASH_CONTEXT ctxIncHash; + + SYMCRYPT_ASSERT((uLeaf & ((1UL << uHeight) - 1)) == 0); // uLeaf must be a multiple of 2^uHeight + SYMCRYPT_ASSERT(pParams->nLayerHeight < 32); // Ensure nLeaves fits in 32 bits + + SIZE_T cbScratchTree = SymCryptHbsSizeofScratchBytesForIncrementalTreehash(pParams->cbHashOutput, 1ULL << pParams->nLayerHeight); + SIZE_T cbScratchLtree = SymCryptHbsSizeofScratchBytesForIncrementalTreehash(pParams->cbHashOutput, pParams->len); + + SYMCRYPT_ASSERT(cbScratch >= (cbScratchTree + cbScratchLtree)); + + PBYTE pbScratchTree = pbScratch; + PBYTE pbScratchLtree = pbScratch + cbScratchTree; + + SymCryptXmssSetAdrsType(adrs, XMSS_ADRS_TYPE_HASH_TREE); + + ctxIncHash.adrs = *adrs; + ctxIncHash.pParams = pParams; + ctxIncHash.pbSeed = pbSeed; + + pIncHash = SymCryptHbsIncrementalTreehashInit( + 1ULL << uHeight, + pbScratchTree, + cbScratchTree, + pParams->cbHashOutput, + SymCryptXmssTreeNodeCompress, + &ctxIncHash); + + for (UINT32 nLeafIndex = uLeaf; nLeafIndex < uLeaf + (1UL << uHeight); nLeafIndex++) + { + pNode = SymCryptHbsIncrementalTreehashAllocNode(pIncHash, nLeafIndex); + + SymCryptXmssCreateWotspPublickey(pParams, + adrs, + nLeafIndex, + pbSkXmss, + pbSeed, + pbScratchLtree, + cbScratchLtree, + pNode->value ); + + SymCryptHbsIncrementalTreehashProcess(pIncHash); + } + + pNode = SymCryptHbsIncrementalTreehashFinalize(pIncHash); + + memcpy(pbRoot, pNode->value, pParams->cbHashOutput); +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmssComputePublicRoot( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams, + _In_reads_bytes_( cbSeed ) PCBYTE pbSeed, + SIZE_T cbSeed, + _In_reads_bytes_( cbSkXmss ) PCBYTE pbSkXmss, + SIZE_T cbSkXmss, + _Out_writes_bytes_( cbRoot ) PBYTE pbRoot, + SIZE_T cbRoot ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PBYTE pbScratch = NULL; + SIZE_T cbScratch = 0; + XMSS_ADRS adrs; + + SYMCRYPT_ASSERT(pParams->nLayerHeight < 32); // Ensure nLeaves fits in 32 bits + + if (pbRoot == NULL || cbRoot != pParams->cbHashOutput || + pbSeed == NULL || cbSeed != pParams->cbHashOutput || + pbSkXmss == NULL || cbSkXmss != pParams->cbHashOutput) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + cbScratch += SymCryptHbsSizeofScratchBytesForIncrementalTreehash(pParams->cbHashOutput, 1ULL << pParams->nLayerHeight); + cbScratch += SymCryptHbsSizeofScratchBytesForIncrementalTreehash(pParams->cbHashOutput, pParams->len); + + SYMCRYPT_ASSERT(cbScratch > 0); + pbScratch = SymCryptCallbackAlloc(cbScratch); + + if (pbScratch == NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + SymCryptWipeKnownSize(&adrs, sizeof(XMSS_ADRS)); + SYMCRYPT_STORE_MSBFIRST32(adrs.en32Layer, pParams->nLayers - 1); + + SymCryptXmssComputeSubtreeRoot( + pParams, + &adrs, + pbSkXmss, + pbSeed, + 0, + pParams->nLayerHeight, + pbScratch, + cbScratch, + pbRoot ); + +cleanup: + + if (pbScratch != NULL) + { + SymCryptWipe(pbScratch, cbScratch); + SymCryptCallbackFree(pbScratch); + } + + return scError; +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmsskeyVerifyRoot( + _In_ PCSYMCRYPT_XMSS_KEY pKey) +{ + BYTE Root[SYMCRYPT_HASH_MAX_RESULT_SIZE]; + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + SYMCRYPT_CHECK_MAGIC(pKey); + + // key to be verified has to be a private key + if (pKey->keyType != SYMCRYPT_XMSSKEY_TYPE_PRIVATE) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + SymCryptWipeKnownSize(Root, sizeof(Root)); + + scError = SymCryptXmssComputePublicRoot( + &pKey->params, + pKey->Seed, + pKey->params.cbHashOutput, + pKey->SkXmss, + pKey->params.cbHashOutput, + Root, + pKey->params.cbHashOutput); + + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + if (!SymCryptEqual(Root, pKey->Root, pKey->params.cbHashOutput)) + { + scError = SYMCRYPT_HBS_PUBLIC_ROOT_MISMATCH; + } + +cleanup: + + return scError; +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmsskeyGenerate( + _Inout_ PSYMCRYPT_XMSS_KEY pKey, + UINT32 flags) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + + SYMCRYPT_CHECK_MAGIC(pKey); + + if (flags != 0) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Wipe key material + SymCryptWipeKnownSize(pKey->Root, sizeof(pKey->Root)); + SymCryptWipeKnownSize(pKey->Seed, sizeof(pKey->Seed)); + SymCryptWipeKnownSize(pKey->SkPrf, sizeof(pKey->SkPrf)); + SymCryptWipeKnownSize(pKey->SkXmss, sizeof(pKey->SkXmss)); + pKey->Idx = 0; + + scError = SymCryptCallbackRandom(pKey->SkPrf, pKey->params.cbHashOutput); + + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + scError = SymCryptCallbackRandom(pKey->SkXmss, pKey->params.cbHashOutput); + + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + scError = SymCryptCallbackRandom(pKey->Seed, pKey->params.cbHashOutput); + + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + // Compute public root from the private key + scError = SymCryptXmssComputePublicRoot( + &pKey->params, + pKey->Seed, + pKey->params.cbHashOutput, + pKey->SkXmss, + pKey->params.cbHashOutput, + pKey->Root, + pKey->params.cbHashOutput); + + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + + pKey->keyType = SYMCRYPT_XMSSKEY_TYPE_PRIVATE; + +cleanup: + + if (scError != SYMCRYPT_NO_ERROR) + { + SymCryptWipeKnownSize(pKey->SkPrf, sizeof(pKey->SkPrf)); + SymCryptWipeKnownSize(pKey->SkXmss, sizeof(pKey->SkXmss)); + pKey->keyType = SYMCRYPT_XMSSKEY_TYPE_NONE; + } + + return scError; +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmsskeySetValue( + _In_reads_bytes_( cbInput ) PCBYTE pbInput, + SIZE_T cbInput, + SYMCRYPT_XMSSKEY_TYPE keyType, + UINT32 flags, + _Inout_ PSYMCRYPT_XMSS_KEY pKey ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + UINT32 uAlgId; + SIZE_T cbKey; + + SYMCRYPT_ASSERT(keyType == SYMCRYPT_XMSSKEY_TYPE_PUBLIC || keyType == SYMCRYPT_XMSSKEY_TYPE_PRIVATE); + + SYMCRYPT_CHECK_MAGIC(pKey); + + if ((flags & (~SYMCRYPT_FLAG_XMSSKEY_VERIFY_ROOT)) != 0 || + (keyType != SYMCRYPT_XMSSKEY_TYPE_PUBLIC && keyType != SYMCRYPT_XMSSKEY_TYPE_PRIVATE)) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Public root validation can only be performed for private keys + if ((flags & SYMCRYPT_FLAG_XMSSKEY_VERIFY_ROOT) != 0 && + keyType != SYMCRYPT_XMSSKEY_TYPE_PRIVATE) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + SymCryptXmssSizeofKeyBlobFromParams(&pKey->params, keyType, &cbKey); + + if (cbInput != cbKey) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + uAlgId = SYMCRYPT_LOAD_MSBFIRST32(pbInput); + pbInput += sizeof(UINT32); + + if (uAlgId != pKey->params.id) + { + scError = SYMCRYPT_INVALID_BLOB; + goto cleanup; + } + + // Wipe private key material + pKey->Idx = 0; + SymCryptWipeKnownSize(pKey->SkPrf, sizeof(pKey->SkPrf)); + SymCryptWipeKnownSize(pKey->SkXmss, sizeof(pKey->SkXmss)); + + pKey->keyType = keyType; + + memcpy(pKey->Root, pbInput, pKey->params.cbHashOutput); + pbInput += pKey->params.cbHashOutput; + + memcpy(pKey->Seed, pbInput, pKey->params.cbHashOutput); + pbInput += pKey->params.cbHashOutput; + + if (keyType == SYMCRYPT_XMSSKEY_TYPE_PRIVATE) + { + pKey->Idx = SYMCRYPT_LOAD_MSBFIRST64(pbInput); + pbInput += sizeof(UINT64); + + memcpy(pKey->SkXmss, pbInput, pKey->params.cbHashOutput); + pbInput += pKey->params.cbHashOutput; + + memcpy(pKey->SkPrf, pbInput, pKey->params.cbHashOutput); + pbInput += pKey->params.cbHashOutput; + + if ((flags & SYMCRYPT_FLAG_XMSSKEY_VERIFY_ROOT) != 0) + { + // pKey has been initialized by now + scError = SymCryptXmsskeyVerifyRoot(pKey); + + if (scError != SYMCRYPT_NO_ERROR) + { + goto cleanup; + } + } + } + +cleanup: + + if (scError != SYMCRYPT_NO_ERROR) + { + SymCryptWipeKnownSize(pKey->SkPrf, sizeof(pKey->SkPrf)); + SymCryptWipeKnownSize(pKey->SkXmss, sizeof(pKey->SkXmss)); + pKey->keyType = SYMCRYPT_XMSSKEY_TYPE_NONE; + } + + return scError; +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmsskeyGetValue( + _In_ PCSYMCRYPT_XMSS_KEY pKey, + SYMCRYPT_XMSSKEY_TYPE keyType, + UINT32 flags, + _Out_writes_bytes_( cbOutput ) PBYTE pbOutput, + SIZE_T cbOutput) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + SIZE_T cbKey; + + SYMCRYPT_ASSERT(keyType == SYMCRYPT_XMSSKEY_TYPE_PUBLIC || keyType == SYMCRYPT_XMSSKEY_TYPE_PRIVATE); + + SYMCRYPT_CHECK_MAGIC(pKey); + + if (flags != 0 || + (keyType != SYMCRYPT_XMSSKEY_TYPE_PUBLIC && keyType != SYMCRYPT_XMSSKEY_TYPE_PRIVATE) || + pKey->keyType == SYMCRYPT_XMSSKEY_TYPE_NONE) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // Cannot export private key from a public key object + if (keyType == SYMCRYPT_XMSSKEY_TYPE_PRIVATE && pKey->keyType != SYMCRYPT_XMSSKEY_TYPE_PRIVATE) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + SymCryptXmssSizeofKeyBlobFromParams(&pKey->params, keyType, &cbKey); + + if (cbOutput != cbKey) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + // + // Public Key + // + + // Alg Id + SYMCRYPT_STORE_MSBFIRST32(pbOutput, pKey->params.id); + pbOutput += sizeof(UINT32); + + // Root + memcpy(pbOutput, pKey->Root, pKey->params.cbHashOutput); + pbOutput += pKey->params.cbHashOutput; + + // Seed + memcpy(pbOutput, pKey->Seed, pKey->params.cbHashOutput); + pbOutput += pKey->params.cbHashOutput; + + if (keyType == SYMCRYPT_XMSSKEY_TYPE_PRIVATE) + { + // + // Private Key + // + + // Idx + SYMCRYPT_STORE_MSBFIRST64(pbOutput, pKey->Idx); + pbOutput += sizeof(pKey->Idx); + + // SK_XMSS + memcpy(pbOutput, pKey->SkXmss, pKey->params.cbHashOutput); + pbOutput += pKey->params.cbHashOutput; + + // SK_PRF + memcpy(pbOutput, pKey->SkPrf, pKey->params.cbHashOutput); + pbOutput += pKey->params.cbHashOutput; + } + +cleanup: + + return scError; +} + + +UINT32 +SYMCRYPT_CALL +SymCryptHbsGetDigit( + UINT32 width, + _In_ PCBYTE pbBuffer, + SIZE_T cbBuffer, + UINT32 index ) +{ + UNREFERENCED_PARAMETER(cbBuffer); + + SYMCRYPT_ASSERT(width == 1 || width == 2 || width == 4 || width == 8); + SYMCRYPT_ASSERT(index < ((cbBuffer * 8) / width)); + + UINT32 digitsPerByte = 8 / width; + + BYTE value = pbBuffer[index / digitsPerByte]; + + value >>= width * (digitsPerByte - 1 - (index % digitsPerByte)); + + value &= (1 << width) - 1; + + return value; +} + + +VOID +SYMCRYPT_CALL +SymCryptXmssTreeRootFromAuthenticationPath( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams, + _Inout_ XMSS_ADRS *adrs, + UINT32 uLeaf, + _In_reads_bytes_( pParams->cbHashOutput ) + PCBYTE pbStartingNode, + _In_reads_bytes_( pParams->cbHashOutput * pParams->nLayerHeight ) + PCBYTE pbAuthNodes, + _In_reads_bytes_( pParams->cbHashOutput ) + PCBYTE pbSeed, + _Out_writes_bytes_( pParams->cbHashOutput ) + PBYTE pbOutput ) +{ + BYTE node[SYMCRYPT_HASH_MAX_RESULT_SIZE]; + BYTE tmp[SYMCRYPT_HASH_MAX_RESULT_SIZE]; + UINT32 uNodeIndex = uLeaf; + + memcpy(node, pbStartingNode, pParams->cbHashOutput); + + SymCryptXmssSetAdrsType(adrs, XMSS_ADRS_TYPE_HASH_TREE); + SYMCRYPT_STORE_MSBFIRST32(adrs->u.hashtree.en32Index, uNodeIndex); + + for (UINT32 i = 0; i < pParams->nLayerHeight; i++) + { + SYMCRYPT_STORE_MSBFIRST32(adrs->u.hashtree.en32Height, i); + + if ( ((uLeaf >> i) & 1) == 0 ) + { + uNodeIndex = uNodeIndex / 2; + SYMCRYPT_STORE_MSBFIRST32(adrs->u.hashtree.en32Index, uNodeIndex); + SymCryptXmssRandHash(pParams, adrs, pbSeed, node, &pbAuthNodes[pParams->cbHashOutput * i], tmp); + } + else + { + uNodeIndex = (uNodeIndex - 1) / 2; + SYMCRYPT_STORE_MSBFIRST32(adrs->u.hashtree.en32Index, uNodeIndex); + SymCryptXmssRandHash(pParams, adrs, pbSeed, &pbAuthNodes[pParams->cbHashOutput * i], node, tmp); + } + + memcpy(node, tmp, pParams->cbHashOutput); + } + + memcpy(pbOutput, node, pParams->cbHashOutput); +} + +VOID +SYMCRYPT_CALL +SymCryptXmssRandomizedHash( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams, + UINT64 Idx, + _In_reads_bytes_( pParams->cbHashOutput ) PCBYTE pbRandomizer, + _In_reads_bytes_( pParams->cbHashOutput ) PCBYTE pbRoot, + _In_reads_bytes_( pParams->cbHashOutput ) PCBYTE pbMsg, + SIZE_T cbMsg, + _Out_writes_bytes_( pParams->cbHashOutput ) PBYTE pbOutput ) +{ + SYMCRYPT_HASH_STATE state; + BYTE idxBuf[SYMCRYPT_HASH_MAX_RESULT_SIZE]; + + SymCryptWipe(idxBuf, pParams->cbHashOutput); + SYMCRYPT_STORE_MSBFIRST64(&idxBuf[pParams->cbHashOutput - sizeof(Idx)], Idx); + + SymCryptXmssPrfInit(pParams->hash, SYMCRYPT_XMSS_H_MSG, pParams->cbPrefix, &state); + SymCryptHashAppend(pParams->hash, &state, pbRandomizer, pParams->cbHashOutput); + SymCryptHashAppend(pParams->hash, &state, pbRoot, pParams->cbHashOutput); + SymCryptHashAppend(pParams->hash, &state, idxBuf, pParams->cbHashOutput); + SymCryptHashAppend(pParams->hash, &state, pbMsg, cbMsg); + SymCryptHashResult(pParams->hash, &state, pbOutput, pParams->cbHashOutput); +} + + +VOID +SYMCRYPT_CALL +SymCryptXmssWotspPublickeyFromSignature( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams, + _Inout_ XMSS_ADRS *adrs, + UINT32 idx, + _In_reads_bytes_( pParams->cbHashOutput ) + PCBYTE pbMsg, + _In_reads_bytes_( pParams->cbHashOutput ) + PCBYTE pbSeed, + _In_reads_bytes_( pParams->cbHashOutput * pParams->len ) + PCBYTE pbSignature, + _Out_writes_bytes_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch, + _Out_writes_bytes_( pParams->cbHashOutput ) + PBYTE pbOutput ) +{ + + UINT32 digit; + UINT32 checksum = 0; + BYTE en32Checksum[4]; + const UINT32 maxChainIndex = (1 << pParams->nWinternitzWidth) - 1; + PSYMCRYPT_INCREMENTAL_TREEHASH pIncHash = NULL; + PSYMCRYPT_TREEHASH_NODE pNode = NULL; + SYMCRYPT_XMSS_INCREMENTAL_TREEHASH_CONTEXT ctxIncHash; + + SymCryptXmssSetAdrsType(adrs, XMSS_ADRS_TYPE_LTREE); + SYMCRYPT_STORE_MSBFIRST32(adrs->u.ltree.en32Leaf, idx); + + ctxIncHash.adrs = *adrs; + ctxIncHash.pParams = pParams; + ctxIncHash.pbSeed = pbSeed; + + pIncHash = SymCryptHbsIncrementalTreehashInit( + pParams->len, + pbScratch, + cbScratch, + pParams->cbHashOutput, + SymCryptXmssLtreeNodeCompress, + &ctxIncHash); + + for (UINT32 i = 0; i < pParams->len; i++) + { + if (i < pParams->len1) + { + digit = SymCryptHbsGetDigit(pParams->nWinternitzWidth, pbMsg, pParams->cbHashOutput, i); + + checksum += maxChainIndex - digit; + } + else + { + if (i == pParams->len1) + { + checksum <<= pParams->nLeftShift32; + SYMCRYPT_STORE_MSBFIRST32(en32Checksum, checksum); + } + + digit = SymCryptHbsGetDigit(pParams->nWinternitzWidth, en32Checksum, sizeof(en32Checksum), i - pParams->len1); + } + + pNode = SymCryptHbsIncrementalTreehashAllocNode(pIncHash, i); + + SymCryptXmssSetAdrsType(adrs, XMSS_ADRS_TYPE_OTS); + SYMCRYPT_STORE_MSBFIRST32(adrs->u.ots.en32Leaf, idx); + SYMCRYPT_STORE_MSBFIRST32(adrs->u.ots.en32Chain, i); + + SymCryptXmssChain( + pParams, + &pbSignature[pParams->cbHashOutput * i], + digit, + maxChainIndex - digit, + pbSeed, + adrs, + pNode->value); + + SymCryptHbsIncrementalTreehashProcess(pIncHash); + } + + pNode = SymCryptHbsIncrementalTreehashFinalize(pIncHash); + + memcpy(pbOutput, pNode->value, pParams->cbHashOutput); +} + + +VOID +SYMCRYPT_CALL +SymCryptXmssTreeRootFromSignature( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams, + _Inout_ XMSS_ADRS *adrs, + _In_reads_bytes_( pParams->cbHashOutput ) + PCBYTE pbSeed, + _In_reads_bytes_( pParams->cbHashOutput ) + PCBYTE pbHash, + UINT32 uLeaf, + _In_reads_bytes_( pParams->cbHashOutput * pParams->len ) + PCBYTE pbWotspSig, + _In_reads_bytes_( pParams->cbHashOutput* pParams->nLayerHeight ) + PCBYTE pbAuthNodes, + _Out_writes_bytes_( pParams->cbHashOutput ) + PBYTE pbOutput, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + BYTE WotspPublickey[SYMCRYPT_HASH_MAX_RESULT_SIZE]; + + SymCryptXmssWotspPublickeyFromSignature( + pParams, + adrs, + uLeaf, + pbHash, + pbSeed, + pbWotspSig, + pbScratch, + cbScratch, + WotspPublickey); + + SymCryptXmssTreeRootFromAuthenticationPath( + pParams, + adrs, + uLeaf, + WotspPublickey, + pbAuthNodes, + pbSeed, + pbOutput); +} + +SIZE_T +SYMCRYPT_CALL +SymCryptXmssSizeofWotspSignature(_In_ PCSYMCRYPT_XMSS_PARAMS pParams) +{ + // WOTSP signature size is len = len1 + len2 many hash outputs + return pParams->cbHashOutput * pParams->len; +} + +SIZE_T +SYMCRYPT_CALL +SymCryptXmssSizeofAuthNodes( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams) +{ + // size of authentication nodes for single tree + return pParams->cbHashOutput * pParams->nLayerHeight; +} + +UINT64 +SYMCRYPT_CALL +SymCryptXmssSignatureGetIdx( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams, + _In_ PCBYTE pbSig ) +{ + UINT64 Idx = 0; + + for (UINT8 i = 0; i < pParams->cbIdx; i++) + { + Idx <<= 8; + Idx |= (UINT64)pbSig[i]; + } + + return Idx; +} + +PBYTE +SYMCRYPT_CALL +SymCryptXmssSignatureGetRandomness( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams, + _In_ PCBYTE pbSig ) +{ + PBYTE pb = (PBYTE)pbSig; + + // randomness comes after idx + pb += pParams->cbIdx; + + return pb; +} + +PBYTE +SYMCRYPT_CALL +SymCryptXmssSignatureGetWotspSig( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams, + _In_ PCBYTE pbSig, + UINT32 uLayer ) +{ + PBYTE pb = SymCryptXmssSignatureGetRandomness(pParams, pbSig); + + // skip randomness + pb += pParams->cbHashOutput; + + // each layer contains WOTSP signature and AuthNodes + pb += uLayer * (SymCryptXmssSizeofWotspSignature(pParams) + SymCryptXmssSizeofAuthNodes(pParams)); + + return pb; +} + +PBYTE +SYMCRYPT_CALL +SymCryptXmssSignatureGetAuthNodes( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams, + _In_ PCBYTE pbSig, + UINT32 uLayer ) +{ + PBYTE pb = SymCryptXmssSignatureGetWotspSig(pParams, pbSig, uLayer); + + // AuthNodes follow WOTSP signature + pb += SymCryptXmssSizeofWotspSignature(pParams); + + return pb; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmssVerifyInternal( + _Inout_ PSYMCRYPT_XMSS_KEY pKey, + _In_reads_bytes_( cbMessage ) PCBYTE pbMessage, + SIZE_T cbMessage, + UINT32 flags, + _In_reads_bytes_( cbSignature ) PCBYTE pbSignature, + SIZE_T cbSignature ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PBYTE pbScratch = NULL; + SIZE_T cbScratch = 0; + PCSYMCRYPT_XMSS_PARAMS pParams = &pKey->params; + BYTE RandomizedHash[SYMCRYPT_HASH_MAX_RESULT_SIZE]; + BYTE ComputedRoot[SYMCRYPT_HASH_MAX_RESULT_SIZE]; + XMSS_ADRS adrs; + UINT32 uLayer; + UINT64 uTree; + UINT32 uLeaf; + const UINT64 LeafMask = (1ULL << pParams->nLayerHeight) - 1; + + SYMCRYPT_CHECK_MAGIC(pKey); + + SYMCRYPT_ASSERT(pParams->nLayerHeight < 32); // Ensure nLeaves fits in 32 bits + + if (flags != 0 || + pbSignature == NULL || + cbSignature != SymCryptXmssSizeofSignatureFromParams(pParams) || + pKey->keyType == SYMCRYPT_XMSSKEY_TYPE_NONE ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + cbScratch += SymCryptHbsSizeofScratchBytesForIncrementalTreehash(pParams->cbHashOutput, pParams->len); + cbScratch += SymCryptHbsSizeofScratchBytesForIncrementalTreehash(pParams->cbHashOutput, 1ULL << pParams->nLayerHeight); + + pbScratch = (PBYTE)SymCryptCallbackAlloc(cbScratch); + + if (pbScratch == NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + PBYTE pbRandomness = SymCryptXmssSignatureGetRandomness(pParams, pbSignature); + UINT64 Idx = SymCryptXmssSignatureGetIdx(pParams, pbSignature); + + SymCryptXmssRandomizedHash( + pParams, + Idx, + pbRandomness, + pKey->Root, + pbMessage, + cbMessage, + RandomizedHash); + + SymCryptWipeKnownSize(&adrs, sizeof(XMSS_ADRS)); + + for (uLayer = 0; uLayer < pParams->nLayers; uLayer++) + { + uTree = Idx >> pParams->nLayerHeight; + uLeaf = (UINT32)(Idx & LeafMask); + + SYMCRYPT_STORE_MSBFIRST32(adrs.en32Layer, uLayer); + SYMCRYPT_STORE_MSBFIRST64(adrs.en64Tree, uTree); + SymCryptXmssTreeRootFromSignature( + pParams, + &adrs, + pKey->Seed, + uLayer == 0 ? RandomizedHash : ComputedRoot, + uLeaf, + SymCryptXmssSignatureGetWotspSig(pParams, pbSignature, uLayer), + SymCryptXmssSignatureGetAuthNodes(pParams, pbSignature, uLayer), + ComputedRoot, + pbScratch, + cbScratch); + + Idx >>= pParams->nLayerHeight; + } + + if (!SymCryptEqual(ComputedRoot, pKey->Root, pParams->cbHashOutput)) + { + scError = SYMCRYPT_SIGNATURE_VERIFICATION_FAILURE; + goto cleanup; + } + +cleanup: + + if (pbScratch) + { + SymCryptWipe(pbScratch, cbScratch); + SymCryptCallbackFree(pbScratch); + } + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmssVerify( + _Inout_ PSYMCRYPT_XMSS_KEY pKey, + _In_reads_bytes_( cbMessage ) PCBYTE pbMessage, + SIZE_T cbMessage, + UINT32 flags, + _In_reads_bytes_( cbSignature ) PCBYTE pbSignature, + SIZE_T cbSignature ) +{ + SYMCRYPT_RUN_SELFTEST_ONCE( + SymCryptXmssSelftest, + SYMCRYPT_SELFTEST_ALGORITHM_XMSS); + + return SymCryptXmssVerifyInternal( + pKey, + pbMessage, + cbMessage, + flags, + pbSignature, + cbSignature); +} + +VOID +SYMCRYPT_CALL +SymCryptXmssWotspSign( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams, + _Inout_ XMSS_ADRS* adrs, + _In_reads_bytes_( pParams->cbHashOutput ) PCBYTE pbInput, + UINT32 uLeaf, + _In_reads_bytes_( pParams->cbHashOutput ) PCBYTE pbSeed, + _In_reads_bytes_( pParams->cbHashOutput ) PCBYTE pbSkXmss, + _Out_writes_bytes_( pParams->cbHashOutput * pParams->len ) PBYTE pbOutput ) +{ + BYTE node[SYMCRYPT_HASH_MAX_RESULT_SIZE]; + UINT32 nChecksum = 0; + BYTE en32Checksum[sizeof(UINT32)]; + UINT32 digit; + const UINT32 maxChainIndex = (1UL << pParams->nWinternitzWidth) - 1; + + + SymCryptXmssSetAdrsType(adrs, XMSS_ADRS_TYPE_OTS); + SYMCRYPT_STORE_MSBFIRST32(adrs->u.ots.en32Leaf, uLeaf); + + for (UINT32 i = 0; i < pParams->len; i++) + { + SYMCRYPT_STORE_MSBFIRST32(adrs->u.ots.en32Chain, i); + SymCryptXmssCreateWotspSecret( + pParams, + pbSkXmss, + pbSeed, + adrs, + node); + + if (i < pParams->len1) + { + digit = SymCryptHbsGetDigit(pParams->nWinternitzWidth, pbInput, pParams->cbHashOutput, i); + + nChecksum += maxChainIndex - digit; + } + else + { + if (i == pParams->len1) + { + nChecksum <<= pParams->nLeftShift32; + SYMCRYPT_STORE_MSBFIRST32(en32Checksum, nChecksum); + } + + digit = SymCryptHbsGetDigit(pParams->nWinternitzWidth, en32Checksum, sizeof(en32Checksum), i - pParams->len1); + } + + SymCryptXmssChain( + pParams, + node, + 0, + digit, + pbSeed, + adrs, + &pbOutput[i * pParams->cbHashOutput]); + } + + SymCryptWipeKnownSize(node, sizeof(node)); +} + + +VOID +SYMCRYPT_CALL +SymCryptXmssTreeSignHash( + _In_ PCSYMCRYPT_XMSS_PARAMS pParams, + _Inout_ XMSS_ADRS *adrs, + _In_reads_bytes_( pParams->cbHashOutput ) + PCBYTE pbSkXmss, + _In_reads_bytes_( pParams->cbHashOutput ) + PCBYTE pbSeed, + _In_reads_bytes_( pParams->cbHashOutput ) + PCBYTE pbHash, + UINT32 Idx, + _Out_writes_bytes_( pParams->cbHashOutput * pParams->len ) + PBYTE pbWotspSig, + _Out_writes_bytes_( pParams->cbHashOutput * pParams->nLayerHeight ) + PBYTE pbAuthNodes, + _Out_writes_bytes_opt_( pParams->cbHashOutput ) + PBYTE pbRoot, + _Out_writes_bytes_opt_( cbScratch ) + PBYTE pbScratch, + SIZE_T cbScratch ) +{ + BYTE WotspPublicKey[SYMCRYPT_HASH_MAX_RESULT_SIZE]; + + SymCryptXmssWotspSign( + pParams, + adrs, + pbHash, + Idx, + pbSeed, + pbSkXmss, + pbWotspSig ); + + // Generate authentication path + for (UINT32 h = 0; h < pParams->nLayerHeight; h++) + { + UINT32 uLeaf = ((Idx >> h) ^ 1UL) << h; + SymCryptXmssComputeSubtreeRoot( + pParams, + adrs, + pbSkXmss, + pbSeed, + uLeaf, + h, + pbScratch, + cbScratch, + &pbAuthNodes[h * pParams->cbHashOutput]); + } + + // + // Calculate tree root if requested by the caller + // + // This is used to return the tree root to be signed with the upper + // layer in XMSS^MT. + if (pbRoot) + { + SymCryptXmssCreateWotspPublickey( + pParams, + adrs, + Idx, + pbSkXmss, + pbSeed, + pbScratch, + cbScratch, + WotspPublicKey); + + SymCryptXmssTreeRootFromAuthenticationPath( + pParams, + adrs, + Idx, + WotspPublicKey, + pbAuthNodes, + pbSeed, + pbRoot); + } +} + +// +// Compute randomness for randomized hashing +// +VOID +SYMCRYPT_CALL +SymCryptXmssComputeRandomness( + _In_ PCSYMCRYPT_XMSS_KEY pKey, + UINT64 Idx, + _Out_writes_bytes_( pKey->params.cbHashOutput ) PBYTE pbRandomness ) +{ + BYTE IdxBuffer[32]; + + SymCryptWipeKnownSize(IdxBuffer, sizeof(IdxBuffer)); + SYMCRYPT_STORE_MSBFIRST64(IdxBuffer + sizeof(IdxBuffer) - sizeof(Idx), Idx); + + SymCryptXmssPrf( + &pKey->params, + SYMCRYPT_XMSS_PRF, + pKey->SkPrf, + pKey->params.cbHashOutput, + IdxBuffer, + sizeof(IdxBuffer), + pbRandomness); +} + + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXmssSign( + _Inout_ PSYMCRYPT_XMSS_KEY pKey, + _In_reads_bytes_( cbMessage ) PCBYTE pbMessage, + SIZE_T cbMessage, + UINT32 flags, + _Out_writes_bytes_( cbSignature ) PBYTE pbSignature, + SIZE_T cbSignature ) +{ + SYMCRYPT_ERROR scError = SYMCRYPT_NO_ERROR; + PBYTE pbScratch = NULL; + SIZE_T cbScratch = 0; + PSYMCRYPT_XMSS_PARAMS pParams = &pKey->params; + UINT64 Idx; + BYTE en64Idx[sizeof(UINT64)]; + BYTE Randomness[SYMCRYPT_HASH_MAX_RESULT_SIZE]; + BYTE RandomizedHash[SYMCRYPT_HASH_MAX_RESULT_SIZE]; + BYTE TreeRoot[SYMCRYPT_HASH_MAX_RESULT_SIZE]; + XMSS_ADRS adrs; + UINT32 uLayer; + UINT64 uTree; + UINT32 uLeaf; + const UINT64 LeafMask = (1ULL << pParams->nLayerHeight) - 1; + + SYMCRYPT_CHECK_MAGIC(pKey); + + SYMCRYPT_ASSERT(pParams->nLayerHeight < 32); // Ensure nLeaves fits in 32 bits + + if (flags != 0 || + pbSignature == NULL || + cbSignature != SymCryptXmssSizeofSignatureFromParams(pParams) || + pKey->keyType != SYMCRYPT_XMSSKEY_TYPE_PRIVATE ) + { + scError = SYMCRYPT_INVALID_ARGUMENT; + goto cleanup; + } + + cbScratch += SymCryptHbsSizeofScratchBytesForIncrementalTreehash(pParams->cbHashOutput, pParams->len); // Ltree hashing + cbScratch += SymCryptHbsSizeofScratchBytesForIncrementalTreehash(pParams->cbHashOutput, 1ULL << pParams->nLayerHeight); // Merkle-tree hashing + + pbScratch = (PBYTE)SymCryptCallbackAlloc(cbScratch); + + if (pbScratch == NULL) + { + scError = SYMCRYPT_MEMORY_ALLOCATION_FAILURE; + goto cleanup; + } + + Idx = SYMCRYPT_ATOMIC_ADD64_POST_RELAXED(&pKey->Idx, 1) - 1; + if (Idx >= (1ULL << pParams->nTotalTreeHeight)) + { + // Set Idx to first unusable value + pKey->Idx = (1ULL << pParams->nTotalTreeHeight); + + scError = SYMCRYPT_HBS_NO_OTS_KEYS_LEFT; + goto cleanup; + } + + SYMCRYPT_STORE_MSBFIRST64(en64Idx, Idx); + memcpy(pbSignature, &en64Idx[sizeof(en64Idx) - pParams->cbIdx], pParams->cbIdx); + + SymCryptXmssComputeRandomness(pKey, Idx, Randomness); + memcpy(SymCryptXmssSignatureGetRandomness(pParams, pbSignature), Randomness, pKey->params.cbHashOutput); + + SymCryptXmssRandomizedHash(&pKey->params, Idx, Randomness, pKey->Root, pbMessage, cbMessage, RandomizedHash); + + SymCryptWipeKnownSize(&adrs, sizeof(XMSS_ADRS)); + + for (uLayer = 0; uLayer < pParams->nLayers; uLayer++) + { + uTree = Idx >> pParams->nLayerHeight; + uLeaf = (UINT32)(Idx & LeafMask); + + SYMCRYPT_STORE_MSBFIRST32(adrs.en32Layer, uLayer); + SYMCRYPT_STORE_MSBFIRST64(adrs.en64Tree, uTree); + + SymCryptXmssTreeSignHash( + &pKey->params, + &adrs, + pKey->SkXmss, + pKey->Seed, + uLayer == 0 ? RandomizedHash : TreeRoot, + uLeaf, + SymCryptXmssSignatureGetWotspSig(pParams, pbSignature, uLayer), + SymCryptXmssSignatureGetAuthNodes(pParams, pbSignature, uLayer), + uLayer == (UINT32)(pParams->nLayers - 1) ? NULL : TreeRoot, // No need to compute the root for the top layer tree + pbScratch, + cbScratch); + + Idx >>= pParams->nLayerHeight; + } + + if (scError != SYMCRYPT_NO_ERROR) + { + SymCryptWipe(pbSignature, cbSignature); + goto cleanup; + } + +cleanup: + + if (pbScratch) + { + SymCryptWipe(pbScratch, cbScratch); + SymCryptCallbackFree(pbScratch); + } + + return scError; +} diff --git a/libs/symcrypt/lib/xtsaes.c b/libs/symcrypt/lib/xtsaes.c new file mode 100644 index 00000000000..45e842702e7 --- /dev/null +++ b/libs/symcrypt/lib/xtsaes.c @@ -0,0 +1,727 @@ +// +// xtsaes.c code for XTS-AES implementation +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#include "precomp.h" + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXtsAesExpandKey( + _Out_ PSYMCRYPT_XTS_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( cbKey ) PCBYTE pbKey, + SIZE_T cbKey ) +{ + SYMCRYPT_ERROR scError; + SIZE_T halfKeySize = cbKey / 2; + + scError = SymCryptAesExpandKey( &pExpandedKey->key1, pbKey, halfKeySize ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + + // + // Pass the 'rest' of the key to the second one. This catches errors such as + // an attempt to pass a 33 byte key. + // halfKeySize = 16, which is valid, but this expansion gets a 17-byte key which will fail. + // Key2 is only used for tweak encryption, so we can use the EncryptOnly key expansion. + // + scError = SymCryptAesExpandKeyEncryptOnly( &pExpandedKey->key2, pbKey + halfKeySize, cbKey - halfKeySize ); + if( scError != SYMCRYPT_NO_ERROR ) + { + goto cleanup; + } + +cleanup: + + return scError; +} + +SYMCRYPT_ERROR +SYMCRYPT_CALL +SymCryptXtsAesExpandKeyEx( + _Out_ PSYMCRYPT_XTS_AES_EXPANDED_KEY pExpandedKey, + _In_reads_( cbKey ) PCBYTE pbKey, + SIZE_T cbKey, + UINT32 flags ) +{ + if( ( flags & SYMCRYPT_FLAG_KEY_NO_FIPS ) == 0 ) + { + // FIPS IG C.I enforces that the two AES keys internally used in XTS-AES are non-equal + if( cbKey > 64 ) + { + return SYMCRYPT_WRONG_KEY_SIZE; + } + if( SymCryptEqual( pbKey, pbKey+(cbKey/2), (cbKey/2) ) ) + { + return SYMCRYPT_FIPS_FAILURE; + } + } + + return SymCryptXtsAesExpandKey( pExpandedKey, pbKey, cbKey ); +} + + +VOID +SYMCRYPT_CALL +SymCryptXtsAesKeyCopy( + _In_ PCSYMCRYPT_XTS_AES_EXPANDED_KEY pSrc, + _Out_ PSYMCRYPT_XTS_AES_EXPANDED_KEY pDst ) +{ + SymCryptAesKeyCopy( &pSrc->key1, &pDst->key1 ); + SymCryptAesKeyCopy( &pSrc->key2, &pDst->key2 ); +} + +#define N_PARALLEL_TWEAKS 16 + +#define SYMCRYPT_XTS_AES_LOCALSCRATCH_DEFN \ + SYMCRYPT_ALIGN BYTE localScratch[N_PARALLEL_TWEAKS * SYMCRYPT_AES_BLOCK_SIZE]; + +#define SYMCRYPT_AesEcbEncryptXxx SymCryptAesEcbEncryptC + +#define SYMCRYPT_XtsAesXxx SymCryptXtsAesEncryptInternalC +#define SYMCRYPT_XTSAESDATAUNIT_INVOKE \ + SymCryptXtsAesEncryptDataUnitC( &pExpandedKey->key1, &tweakBuf[i], pbSrc, pbDst, cbDataUnit ); +#include "xtsaes_pattern.c" +#undef SYMCRYPT_XtsAesXxx +#undef SYMCRYPT_XTSAESDATAUNIT_INVOKE + +#define SYMCRYPT_XtsAesXxx SymCryptXtsAesDecryptInternalC +#define SYMCRYPT_XTSAESDATAUNIT_INVOKE \ + SymCryptXtsAesDecryptDataUnitC( &pExpandedKey->key1, &tweakBuf[i], pbSrc, pbDst, cbDataUnit ); +#include "xtsaes_pattern.c" +#undef SYMCRYPT_XtsAesXxx +#undef SYMCRYPT_XTSAESDATAUNIT_INVOKE + +#undef SYMCRYPT_AesEcbEncryptXxx + + +#if SYMCRYPT_CPU_AMD64 | SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_ARM +#define SYMCRYPT_AesEcbEncryptXxx SymCryptAesEcbEncryptAsm + +#define SYMCRYPT_XtsAesXxx SymCryptXtsAesEncryptInternalAsm +#define SYMCRYPT_XTSAESDATAUNIT_INVOKE \ + SymCryptXtsAesEncryptDataUnitAsm( &pExpandedKey->key1, &tweakBuf[i], pbSrc, pbDst, cbDataUnit ); +#include "xtsaes_pattern.c" +#undef SYMCRYPT_XtsAesXxx +#undef SYMCRYPT_XTSAESDATAUNIT_INVOKE + +#define SYMCRYPT_XtsAesXxx SymCryptXtsAesDecryptInternalAsm +#define SYMCRYPT_XTSAESDATAUNIT_INVOKE \ + SymCryptXtsAesDecryptDataUnitAsm( &pExpandedKey->key1, &tweakBuf[i], pbSrc, pbDst, cbDataUnit ); +#include "xtsaes_pattern.c" +#undef SYMCRYPT_XtsAesXxx +#undef SYMCRYPT_XTSAESDATAUNIT_INVOKE + +#undef SYMCRYPT_AesEcbEncryptXxx +#endif + +#if SYMCRYPT_CPU_ARM64 +#define SYMCRYPT_AesEcbEncryptXxx SymCryptAesEcbEncryptNeon + +#define SYMCRYPT_XtsAesXxx SymCryptXtsAesEncryptInternalNeon +#define SYMCRYPT_XTSAESDATAUNIT_INVOKE \ + SymCryptXtsAesEncryptDataUnitNeon( &pExpandedKey->key1, &tweakBuf[i], pbSrc, pbDst, cbDataUnit ); +#include "xtsaes_pattern.c" +#undef SYMCRYPT_XtsAesXxx +#undef SYMCRYPT_XTSAESDATAUNIT_INVOKE + +#define SYMCRYPT_XtsAesXxx SymCryptXtsAesDecryptInternalNeon +#define SYMCRYPT_XTSAESDATAUNIT_INVOKE \ + SymCryptXtsAesDecryptDataUnitNeon( &pExpandedKey->key1, &tweakBuf[i], pbSrc, pbDst, cbDataUnit ); +#include "xtsaes_pattern.c" +#undef SYMCRYPT_XtsAesXxx +#undef SYMCRYPT_XTSAESDATAUNIT_INVOKE + +#undef SYMCRYPT_AesEcbEncryptXxx +#endif + +#undef SYMCRYPT_XTS_AES_LOCALSCRATCH_DEFN + + +#if SYMCRYPT_CPU_X86 | SYMCRYPT_CPU_AMD64 + +#define SYMCRYPT_XTS_AES_LOCALSCRATCH_DEFN \ + /* Defining localScratch as a buffer of __m128is ensures there is required 16B alignment on x86 */ \ + __m128i localScratch[ N_PARALLEL_TWEAKS + 16 ]; +#define SYMCRYPT_AesEcbEncryptXxx SymCryptAesEcbEncryptXmm + +#define SYMCRYPT_XtsAesXxx SymCryptXtsAesEncryptInternalXmm +#define SYMCRYPT_XTSAESDATAUNIT_INVOKE \ + SymCryptXtsAesEncryptDataUnitXmm( &pExpandedKey->key1, &tweakBuf[i], (PBYTE) &localScratch[N_PARALLEL_TWEAKS], pbSrc, pbDst, cbDataUnit ); +#include "xtsaes_pattern.c" +#undef SYMCRYPT_XtsAesXxx +#undef SYMCRYPT_XTSAESDATAUNIT_INVOKE + +#define SYMCRYPT_XtsAesXxx SymCryptXtsAesDecryptInternalXmm +#define SYMCRYPT_XTSAESDATAUNIT_INVOKE \ + SymCryptXtsAesDecryptDataUnitXmm( &pExpandedKey->key1, &tweakBuf[i], (PBYTE) &localScratch[N_PARALLEL_TWEAKS], pbSrc, pbDst, cbDataUnit ); +#include "xtsaes_pattern.c" +#undef SYMCRYPT_XtsAesXxx +#undef SYMCRYPT_XTSAESDATAUNIT_INVOKE + +#define SYMCRYPT_XtsAesXxx SymCryptXtsAesEncryptInternalYmm +#define SYMCRYPT_XTSAESDATAUNIT_INVOKE \ + SymCryptXtsAesEncryptDataUnitYmm_2048( &pExpandedKey->key1, &tweakBuf[i], (PBYTE) &localScratch[N_PARALLEL_TWEAKS], pbSrc, pbDst, cbDataUnit ); +#include "xtsaes_pattern.c" +#undef SYMCRYPT_XtsAesXxx +#undef SYMCRYPT_XTSAESDATAUNIT_INVOKE + +#define SYMCRYPT_XtsAesXxx SymCryptXtsAesDecryptInternalYmm +#define SYMCRYPT_XTSAESDATAUNIT_INVOKE \ + SymCryptXtsAesDecryptDataUnitYmm_2048( &pExpandedKey->key1, &tweakBuf[i], (PBYTE) &localScratch[N_PARALLEL_TWEAKS], pbSrc, pbDst, cbDataUnit ); +#include "xtsaes_pattern.c" +#undef SYMCRYPT_XtsAesXxx +#undef SYMCRYPT_XTSAESDATAUNIT_INVOKE + +#if 0 //do not compile Zmm code for now + +#define SYMCRYPT_XtsAesXxx SymCryptXtsAesEncryptInternalZmm +#define SYMCRYPT_XTSAESDATAUNIT_INVOKE \ + SymCryptXtsAesEncryptDataUnitZmm_2048( &pExpandedKey->key1, &tweakBuf[i], (PBYTE) &localScratch[N_PARALLEL_TWEAKS], pbSrc, pbDst, cbDataUnit ); +#include "xtsaes_pattern.c" +#undef SYMCRYPT_XtsAesXxx +#undef SYMCRYPT_XTSAESDATAUNIT_INVOKE + +#define SYMCRYPT_XtsAesXxx SymCryptXtsAesDecryptInternalZmm +#define SYMCRYPT_XTSAESDATAUNIT_INVOKE \ + SymCryptXtsAesDecryptDataUnitZmm_2048( &pExpandedKey->key1, &tweakBuf[i], (PBYTE) &localScratch[N_PARALLEL_TWEAKS], pbSrc, pbDst, cbDataUnit ); +#include "xtsaes_pattern.c" +#undef SYMCRYPT_XtsAesXxx +#undef SYMCRYPT_XTSAESDATAUNIT_INVOKE + +#endif + +#undef SYMCRYPT_XTS_AES_LOCALSCRATCH_DEFN +#undef SYMCRYPT_AesEcbEncryptXxx + +#endif + +VOID +SYMCRYPT_CALL +SymCryptXtsAesEncryptInternal( + _In_ PCSYMCRYPT_XTS_AES_EXPANDED_KEY pExpandedKey, + SIZE_T cbDataUnit, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PCBYTE pbTweak, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData, + BOOLEAN bOverflow ) +{ +#if SYMCRYPT_CPU_AMD64 + SYMCRYPT_EXTENDED_SAVE_DATA SaveData; + /* if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_VAES_512_CODE ) ) { + SymCryptXtsAesEncryptInternalZmm( pExpandedKey, cbDataUnit, pbTweak, pbSrc, pbDst, cbData, bOverflow ); + } else */ + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_VAES_256_CODE ) && + SymCryptSaveYmm( &SaveData ) == SYMCRYPT_NO_ERROR ) + { + SymCryptXtsAesEncryptInternalYmm( pExpandedKey, cbDataUnit, pbTweak, pbSrc, pbDst, cbData, bOverflow ); + SymCryptRestoreYmm( &SaveData ); + } else if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE ) ) { + SymCryptXtsAesEncryptInternalXmm( pExpandedKey, cbDataUnit, pbTweak, pbSrc, pbDst, cbData, bOverflow ); + } else { + SymCryptXtsAesEncryptInternalAsm( pExpandedKey, cbDataUnit, pbTweak, pbSrc, pbDst, cbData, bOverflow ); + } +#elif SYMCRYPT_CPU_X86 + SYMCRYPT_EXTENDED_SAVE_DATA SaveData; + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE ) && + SymCryptSaveXmm( &SaveData ) == SYMCRYPT_NO_ERROR ) + { + SymCryptXtsAesEncryptInternalXmm( pExpandedKey, cbDataUnit, pbTweak, pbSrc, pbDst, cbData, bOverflow ); + SymCryptRestoreXmm( &SaveData ); + } else { + SymCryptXtsAesEncryptInternalAsm( pExpandedKey, cbDataUnit, pbTweak, pbSrc, pbDst, cbData, bOverflow ); + } +#elif SYMCRYPT_CPU_ARM + SymCryptXtsAesEncryptInternalAsm( pExpandedKey, cbDataUnit, pbTweak, pbSrc, pbDst, cbData, bOverflow ); +#elif SYMCRYPT_CPU_ARM64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_NEON_AES ) ) + { + SymCryptXtsAesEncryptInternalNeon( pExpandedKey, cbDataUnit, pbTweak, pbSrc, pbDst, cbData, bOverflow ); + } else { + SymCryptXtsAesEncryptInternalC( pExpandedKey, cbDataUnit, pbTweak, pbSrc, pbDst, cbData, bOverflow ); + } +#else + SymCryptXtsAesEncryptInternalC( pExpandedKey, cbDataUnit, pbTweak, pbSrc, pbDst, cbData, bOverflow ); +#endif +} + +VOID +SYMCRYPT_CALL +SymCryptXtsAesEncrypt( + _In_ PCSYMCRYPT_XTS_AES_EXPANDED_KEY pExpandedKey, + SIZE_T cbDataUnit, + UINT64 tweak, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + SYMCRYPT_ALIGN BYTE fullTweak[SYMCRYPT_AES_BLOCK_SIZE]; + + SYMCRYPT_ASSERT( cbData % cbDataUnit == 0 ); + + if( cbDataUnit < SYMCRYPT_AES_BLOCK_SIZE ) + { + // Invalid data unit size + // Return early to avoid repeated checks deeper in the code + return; + } + + SYMCRYPT_STORE_LSBFIRST64(&fullTweak[0], tweak); + SYMCRYPT_STORE_LSBFIRST64(&fullTweak[8], 0); + + SymCryptXtsAesEncryptInternal( pExpandedKey, cbDataUnit, &fullTweak[0], pbSrc, pbDst, cbData, FALSE ); +} + +VOID +SYMCRYPT_CALL +SymCryptXtsAesEncryptWith128bTweak( + _In_ PCSYMCRYPT_XTS_AES_EXPANDED_KEY pExpandedKey, + SIZE_T cbDataUnit, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PCBYTE pbTweak, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + if( cbDataUnit < SYMCRYPT_AES_BLOCK_SIZE ) + { + // Invalid data unit size + // Return early to avoid repeated checks deeper in the code + return; + } + + SymCryptXtsAesEncryptInternal( pExpandedKey, cbDataUnit, pbTweak, pbSrc, pbDst, cbData, TRUE ); +} + + +VOID +SYMCRYPT_CALL +SymCryptXtsAesDecryptInternal( + _In_ PCSYMCRYPT_XTS_AES_EXPANDED_KEY pExpandedKey, + SIZE_T cbDataUnit, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PCBYTE pbTweak, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData, + BOOLEAN bOverflow ) +{ +#if SYMCRYPT_CPU_AMD64 + SYMCRYPT_EXTENDED_SAVE_DATA SaveData; + /* if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_VAES_512_CODE ) ) { + SymCryptXtsAesDecryptInternalZmm( pExpandedKey, cbDataUnit, pbTweak, pbSrc, pbDst, cbData, bOverflow ); + } else */ + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_VAES_256_CODE ) && + SymCryptSaveYmm( &SaveData ) == SYMCRYPT_NO_ERROR ) + { + SymCryptXtsAesDecryptInternalYmm( pExpandedKey, cbDataUnit, pbTweak, pbSrc, pbDst, cbData, bOverflow ); + SymCryptRestoreYmm( &SaveData ); + } else if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE ) ) { + SymCryptXtsAesDecryptInternalXmm( pExpandedKey, cbDataUnit, pbTweak, pbSrc, pbDst, cbData, bOverflow ); + } else { + SymCryptXtsAesDecryptInternalAsm( pExpandedKey, cbDataUnit, pbTweak, pbSrc, pbDst, cbData, bOverflow ); + } +#elif SYMCRYPT_CPU_X86 + SYMCRYPT_EXTENDED_SAVE_DATA SaveData; + + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURES_FOR_AESNI_CODE ) && + SymCryptSaveXmm( &SaveData ) == SYMCRYPT_NO_ERROR ) + { + SymCryptXtsAesDecryptInternalXmm( pExpandedKey, cbDataUnit, pbTweak, pbSrc, pbDst, cbData, bOverflow ); + SymCryptRestoreXmm( &SaveData ); + } else { + SymCryptXtsAesDecryptInternalAsm( pExpandedKey, cbDataUnit, pbTweak, pbSrc, pbDst, cbData, bOverflow ); + } +#elif SYMCRYPT_CPU_ARM + SymCryptXtsAesDecryptInternalAsm( pExpandedKey, cbDataUnit, pbTweak, pbSrc, pbDst, cbData, bOverflow ); +#elif SYMCRYPT_CPU_ARM64 + if( SYMCRYPT_CPU_FEATURES_PRESENT( SYMCRYPT_CPU_FEATURE_NEON_AES ) ) + { + SymCryptXtsAesDecryptInternalNeon( pExpandedKey, cbDataUnit, pbTweak, pbSrc, pbDst, cbData, bOverflow ); + } else { + SymCryptXtsAesDecryptInternalC( pExpandedKey, cbDataUnit, pbTweak, pbSrc, pbDst, cbData, bOverflow ); + } +#else + SymCryptXtsAesDecryptInternalC( pExpandedKey, cbDataUnit, pbTweak, pbSrc, pbDst, cbData, bOverflow ); +#endif +} + +VOID +SYMCRYPT_CALL +SymCryptXtsAesDecrypt( + _In_ PCSYMCRYPT_XTS_AES_EXPANDED_KEY pExpandedKey, + SIZE_T cbDataUnit, + UINT64 tweak, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + SYMCRYPT_ALIGN BYTE fullTweak[SYMCRYPT_AES_BLOCK_SIZE]; + + SYMCRYPT_ASSERT( cbData % cbDataUnit == 0 ); + + if( cbDataUnit < SYMCRYPT_AES_BLOCK_SIZE ) + { + // Invalid data unit size + // Return early to avoid repeated checks deeper in the code + return; + } + + SYMCRYPT_STORE_LSBFIRST64(&fullTweak[0], tweak); + SYMCRYPT_STORE_LSBFIRST64(&fullTweak[8], 0); + + SymCryptXtsAesDecryptInternal( pExpandedKey, cbDataUnit, &fullTweak[0], pbSrc, pbDst, cbData, FALSE ); +} + +VOID +SYMCRYPT_CALL +SymCryptXtsAesDecryptWith128bTweak( + _In_ PCSYMCRYPT_XTS_AES_EXPANDED_KEY pExpandedKey, + SIZE_T cbDataUnit, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PCBYTE pbTweak, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + if( cbDataUnit < SYMCRYPT_AES_BLOCK_SIZE ) + { + // Invalid data unit size + // Return early to avoid repeated checks deeper in the code + return; + } + + SymCryptXtsAesDecryptInternal( pExpandedKey, cbDataUnit, pbTweak, pbSrc, pbDst, cbData, TRUE ); +} + +VOID +SYMCRYPT_CALL +SymCryptXtsUpdateTweak( + _Inout_updates_(SYMCRYPT_AES_BLOCK_SIZE) PBYTE buf ) +{ +/* + UINT32 b0 = LOAD_LSBFIRST32( buf ); + UINT32 b1 = LOAD_LSBFIRST32( buf + 4 ); + UINT32 b2 = LOAD_LSBFIRST32( buf + 8 ); + UINT32 b3 = LOAD_LSBFIRST32( buf + 12 ); + UINT32 msbit = b3 >> 31; + + // + // The STORE_* macros re-evaluate their arguments sometimes, so we + // keep all computations in local variables. + // + UINT32 r0 = (b0 << 1) ^ (135 * msbit); + UINT32 r1 = (b1 << 1) | (b0 >> 31); + UINT32 r2 = (b2 << 1) | (b1 >> 31); + UINT32 r3 = (b3 << 1) | (b2 >> 31); + + STORE_LSBFIRST32( buf , r0 ); + STORE_LSBFIRST32( buf + 4, r1 ); + STORE_LSBFIRST32( buf + 8, r2 ); + STORE_LSBFIRST32( buf + 12, r3 ); +*/ + UINT64 b0 = SYMCRYPT_LOAD_LSBFIRST64( buf ); + UINT64 b1 = SYMCRYPT_LOAD_LSBFIRST64( buf + 8 ); + + /* + UINT32 msbit = (UINT32)(b1 >> 63); + //UINT32 feedback = 135 * msbit; + UINT32 feedback = (msbit << 7) + (msbit << 3) - msbit; + */ + UINT32 feedback = (((INT64)b1) >> 63) & 135; + + UINT64 r0 = (b0 << 1) ^ feedback; + UINT64 r1 = (b1 << 1) | (b0 >> 63); + + SYMCRYPT_STORE_LSBFIRST64( buf , r0 ); + SYMCRYPT_STORE_LSBFIRST64( buf + 8, r1 ); +} + +VOID +SYMCRYPT_CALL +SymCryptXtsEncryptDataUnit( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ PCVOID pExpandedKey, + _Inout_updates_( pBlockCipher->blockSize ) PBYTE pbTweakBlock, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + BYTE buf[2*SYMCRYPT_AES_BLOCK_SIZE]; + + while( cbData >= 2*SYMCRYPT_AES_BLOCK_SIZE ) + { + SymCryptXorBytes( pbTweakBlock, pbSrc, buf, SYMCRYPT_AES_BLOCK_SIZE ); + (*pBlockCipher->encryptFunc)( pExpandedKey, buf, buf ); + SymCryptXorBytes( pbTweakBlock, buf, pbDst, SYMCRYPT_AES_BLOCK_SIZE ); + + SYMCRYPT_ASSERT( pBlockCipher->blockSize == SYMCRYPT_AES_BLOCK_SIZE ); + SymCryptXtsUpdateTweak( pbTweakBlock ); + + pbSrc += SYMCRYPT_AES_BLOCK_SIZE; + pbDst += SYMCRYPT_AES_BLOCK_SIZE; + cbData -= SYMCRYPT_AES_BLOCK_SIZE; + } + + if( cbData > SYMCRYPT_AES_BLOCK_SIZE ) + { + // Ciphertext stealing encryption + // + // +--------------+ + // | | + // | V + // +-----------------+ | +-----+-----------+ + // | P_m-1 | | | P_m |++++CP+++++| + // +-----------------+ | +-----+-----------+ + // | | | + // enc_m-1 | enc_m + // | | | + // V | V + // +-----+-----------+ | +-----------------+ + // | C_m |++++CP+++++|--+ | C_m-1 | + // +-----+-----------+ +-----------------+ + // | / + // +---------------- / --+ + // / | + // | V + // +-----------------+ | +-----+ + // | C_m-1 |<-+ | C_m | + // +-----------------+ +-----+ + + // Encrypt penultimate plaintext block into buf + SymCryptXorBytes( pbTweakBlock, pbSrc, buf, SYMCRYPT_AES_BLOCK_SIZE ); + (*pBlockCipher->encryptFunc)( pExpandedKey, buf, buf ); + SymCryptXorBytes( pbTweakBlock, buf, buf, SYMCRYPT_AES_BLOCK_SIZE ); + + cbData -= SYMCRYPT_AES_BLOCK_SIZE; + + // Copy buf to buf[SYMCRYPT_AES_BLOCK_SIZE] + memcpy( &buf[SYMCRYPT_AES_BLOCK_SIZE], buf, SYMCRYPT_AES_BLOCK_SIZE ); + // Copy final plaintext bytes to prefix of buf - we must read before writing to support in-place encryption + memcpy( buf, pbSrc + SYMCRYPT_AES_BLOCK_SIZE, cbData ); + // Copy prefix of buf[SYMCRYPT_AES_BLOCK_SIZE] to the right place in the destination buffer + memcpy( pbDst + SYMCRYPT_AES_BLOCK_SIZE, &buf[SYMCRYPT_AES_BLOCK_SIZE], cbData ); + + // Do final tweak update + SymCryptXtsUpdateTweak( pbTweakBlock ); + + // Set pbSrc correctly to share code with non-ciphertext stealing case + pbSrc = &buf[0]; + } + + // Final full block encryption + SymCryptXorBytes( pbTweakBlock, pbSrc, buf, SYMCRYPT_AES_BLOCK_SIZE ); + (*pBlockCipher->encryptFunc)( pExpandedKey, buf, buf ); + SymCryptXorBytes( pbTweakBlock, buf, pbDst, SYMCRYPT_AES_BLOCK_SIZE ); + + SymCryptWipeKnownSize( buf, sizeof(buf) ); +} + +VOID +SYMCRYPT_CALL +SymCryptXtsDecryptDataUnit( + _In_ PCSYMCRYPT_BLOCKCIPHER pBlockCipher, + _In_ PCVOID pExpandedKey, + _Inout_updates_( pBlockCipher->blockSize ) PBYTE pbTweakBlock, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + BYTE buf[2*SYMCRYPT_AES_BLOCK_SIZE]; + BYTE tweakBuf[SYMCRYPT_AES_BLOCK_SIZE]; + + while( cbData >= 2*SYMCRYPT_AES_BLOCK_SIZE ) + { + SymCryptXorBytes( pbTweakBlock, pbSrc, buf, SYMCRYPT_AES_BLOCK_SIZE ); + (*pBlockCipher->decryptFunc)( pExpandedKey, buf, buf ); + SymCryptXorBytes( pbTweakBlock, buf, pbDst, SYMCRYPT_AES_BLOCK_SIZE ); + + SYMCRYPT_ASSERT( pBlockCipher->blockSize == SYMCRYPT_AES_BLOCK_SIZE ); + SymCryptXtsUpdateTweak( pbTweakBlock ); + + pbSrc += SYMCRYPT_AES_BLOCK_SIZE; + pbDst += SYMCRYPT_AES_BLOCK_SIZE; + cbData -= SYMCRYPT_AES_BLOCK_SIZE; + } + + if( cbData > SYMCRYPT_AES_BLOCK_SIZE ) + { + // Ciphertext stealing decryption + // + // +--------------+ + // | | + // | V + // +-----------------+ | +-----+-----------+ + // | C_m-1 | | | C_m |++++CP+++++| + // +-----------------+ | +-----+-----------+ + // | | | + // dec_m | dec_m-1 + // | | | + // V | V + // +-----+-----------+ | +-----------------+ + // | P_m |++++CP+++++|--+ | P_m-1 | + // +-----+-----------+ +-----------------+ + // | / + // +---------------- / --+ + // / | + // | V + // +-----------------+ | +-----+ + // | P_m-1 |<-+ | P_m | + // +-----------------+ +-----+ + + // Save penultimate value of tweak to tweakBuf + memcpy( tweakBuf, pbTweakBlock, SYMCRYPT_AES_BLOCK_SIZE ); + + // Do final tweak update + SymCryptXtsUpdateTweak( pbTweakBlock ); + + // Decrypt penultimate ciphertext block into buf + SymCryptXorBytes( pbTweakBlock, pbSrc, buf, SYMCRYPT_AES_BLOCK_SIZE ); + (*pBlockCipher->decryptFunc)( pExpandedKey, buf, buf ); + SymCryptXorBytes( pbTweakBlock, buf, buf, SYMCRYPT_AES_BLOCK_SIZE ); + + cbData -= SYMCRYPT_AES_BLOCK_SIZE; + + // Copy buf to buf[SYMCRYPT_AES_BLOCK_SIZE] + memcpy( &buf[SYMCRYPT_AES_BLOCK_SIZE], buf, SYMCRYPT_AES_BLOCK_SIZE ); + // Copy final ciphertext bytes to prefix of buf - we must read before writing to support in-place decryption + memcpy( buf, pbSrc + SYMCRYPT_AES_BLOCK_SIZE, cbData ); + // Copy prefix of buf[SYMCRYPT_AES_BLOCK_SIZE] to the right place in the destination buffer + memcpy( pbDst + SYMCRYPT_AES_BLOCK_SIZE, &buf[SYMCRYPT_AES_BLOCK_SIZE], cbData ); + + // Set pbSrc and pbTweakBlock correctly to share code with non-ciphertext stealing case + pbSrc = &buf[0]; + pbTweakBlock = &tweakBuf[0]; + } + + // Final full block decryption + SymCryptXorBytes( pbTweakBlock, pbSrc, buf, SYMCRYPT_AES_BLOCK_SIZE ); + (*pBlockCipher->decryptFunc)( pExpandedKey, buf, buf ); + SymCryptXorBytes( pbTweakBlock, buf, pbDst, SYMCRYPT_AES_BLOCK_SIZE ); + + SymCryptWipeKnownSize( buf, sizeof(buf) ); + SymCryptWipeKnownSize( tweakBuf, sizeof(tweakBuf) ); +} + +VOID +SYMCRYPT_CALL +SymCryptXtsAesEncryptDataUnitAsm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbTweakBlock, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + SYMCRYPT_ASSERT( SymCryptAesBlockCipherNoOpt.blockSize == SYMCRYPT_AES_BLOCK_SIZE ); // keep Prefast happy + SymCryptXtsEncryptDataUnit( + &SymCryptAesBlockCipherNoOpt, + pExpandedKey, + pbTweakBlock, + pbSrc, + pbDst, + cbData ); +} + +VOID +SYMCRYPT_CALL +SymCryptXtsAesDecryptDataUnitAsm( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbTweakBlock, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + SYMCRYPT_ASSERT( SymCryptAesBlockCipherNoOpt.blockSize == SYMCRYPT_AES_BLOCK_SIZE ); // keep Prefast happy + SymCryptXtsDecryptDataUnit( + &SymCryptAesBlockCipherNoOpt, + pExpandedKey, + pbTweakBlock, + pbSrc, + pbDst, + cbData ); +} + +VOID +SYMCRYPT_CALL +SymCryptXtsAesEncryptDataUnitC( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbTweakBlock, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + // No special optimizations... + SYMCRYPT_ASSERT( SymCryptAesBlockCipherNoOpt.blockSize == SYMCRYPT_AES_BLOCK_SIZE ); // keep Prefast happy + SymCryptXtsEncryptDataUnit( + &SymCryptAesBlockCipherNoOpt, + pExpandedKey, + pbTweakBlock, + pbSrc, + pbDst, + cbData ); +} + +VOID +SYMCRYPT_CALL +SymCryptXtsAesDecryptDataUnitC( + _In_ PCSYMCRYPT_AES_EXPANDED_KEY pExpandedKey, + _Inout_updates_( SYMCRYPT_AES_BLOCK_SIZE ) PBYTE pbTweakBlock, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData ) +{ + SYMCRYPT_ASSERT( SymCryptAesBlockCipherNoOpt.blockSize == SYMCRYPT_AES_BLOCK_SIZE ); // keep Prefast happy + SymCryptXtsDecryptDataUnit( + &SymCryptAesBlockCipherNoOpt, + pExpandedKey, + pbTweakBlock, + pbSrc, + pbDst, + cbData ); + +} + +static const BYTE SymCryptXtsAesCiphertext[32] = { + 0xef, 0xe5, 0x8b, 0x1a, 0x0b, 0xaf, 0xc1, 0x08, + 0xe9, 0xb7, 0x74, 0x1c, 0xcb, 0xdc, 0xf8, 0x53, + 0x4f, 0x90, 0x55, 0x32, 0x53, 0xf6, 0x18, 0xd2, + 0x34, 0xd5, 0xf2, 0x29, 0xf6, 0x4f, 0xd3, 0x8c, +}; + +VOID +SYMCRYPT_CALL +SymCryptXtsAesSelftest(void) +{ + SYMCRYPT_XTS_AES_EXPANDED_KEY key; + BYTE buf[32]; + BYTE plaintext[sizeof( buf )]; + + SymCryptWipeKnownSize( buf, sizeof( buf ) ); + buf[0] = 1; + + if( SymCryptXtsAesExpandKeyEx( &key, buf, sizeof( buf ), 0 ) != SYMCRYPT_NO_ERROR ) + { + SymCryptFatal( 'xtsa' ); + } + + SymCryptXtsAesEncrypt( &key, sizeof( buf ), 0, buf, buf, sizeof( buf ) ); + + SymCryptInjectError( buf, sizeof( buf ) ); + if( memcmp( buf, SymCryptXtsAesCiphertext, sizeof( buf ) ) != 0 ) + { + SymCryptFatal( 'xtsa' ); + } + + SymCryptXtsAesDecrypt( &key, sizeof( buf ), 0, buf, buf, sizeof( buf ) ); + + SymCryptInjectError( buf, sizeof( buf ) ); + + SymCryptWipeKnownSize( plaintext, sizeof( plaintext ) ); + plaintext[0] = 1; + if( memcmp( buf, plaintext, sizeof( buf ) ) != 0 ) + { + SymCryptFatal( 'xtsa' ); + } +} diff --git a/libs/symcrypt/lib/xtsaes_definitions.h b/libs/symcrypt/lib/xtsaes_definitions.h new file mode 100644 index 00000000000..0a316b65a1b --- /dev/null +++ b/libs/symcrypt/lib/xtsaes_definitions.h @@ -0,0 +1,176 @@ +// +// xtsaes_definitions.h +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +// +// Multiply by alpha +// +// <</>> indicate shifts on 128-bit values +// <<<</>>>> indicate shifts on 32-bit values (a word) +// + +// Multiply by ALPHA +// Since there's no instruction to shift the 128 bit register left by one, the following shifts do the trick. +// All shifts are zero extended +// t1 = _in <<<< 1 words shifted left by 1, this is almost a _in << 1 but there are +// gaps at first bit of each word, the following two shifts fixes that. +// t2 = _in >>>> 31 words shifted right by 31 +// t1 = t1 ^ (t2 << 32) t1 = _in << 1, note ^ could be | +// Do the special case for first byte of _in where last carry means xor with 135 for first byte. +// t2 = t2 >> 96 t2 = _in >> 127, i.e., last bit of _in is placed in first bit +// t2 = (t2 <<<< 7) + (t2 <<<<3) - (t2) t2 = 135 if last bit of t2 is set +// res = t1 ^ t2 +#define XTS_MUL_ALPHA_old( _in, _res ) \ +{\ + __m128i _t1, _t2;\ +\ + _t1 = _mm_slli_epi32( _in, 1 ); \ + _t2 = _mm_srli_epi32( _in, 31); \ + _t1 = _mm_xor_si128( _t1, _mm_slli_si128( _t2, 4 )); \ + _t2 = _mm_srli_si128( _t2, 12 ); \ + _t2 = _mm_sub_epi32( _mm_add_epi32( _mm_slli_epi32( _t2, 7 ), _mm_slli_epi32( _t2, 3 ) ), _t2 ); \ + _res = _mm_xor_si128( _t1, _t2 ); \ +} + +// An improved approach; use arithmetic shift-right to duplicate the carry-out, PSHUFD to re-arrange, and an AND to +// implement both the polynomial and mask the other words down to 1 bit again. + +// __m128i XTS_ALPHA_MASK = _mm_set_epi32( 1, 1, 1, 0x87 ); +#define XTS_MUL_ALPHA( _in, _res ) \ +{\ + __m128i _t1, _t2;\ +\ + _t1 = _mm_slli_epi32( _in, 1 ); \ + _t2 = _mm_srai_epi32( _in, 31); \ + _t2 = _mm_shuffle_epi32( _t2, _MM_SHUFFLE( 2, 1, 0, 3 ) ); \ + _t2 = _mm_and_si128( _t2, XTS_ALPHA_MASK ); \ + _res = _mm_xor_si128( _t1, _t2 ); \ +} + +// Like XTS_MUL_ALPHA_old but operate on __m512i for _in and _res. +// TODO: do this with VSHUFPS. +#define XTS_MUL_ALPHA_ZMM_old( _in, _res ) \ +{\ + __m512i _t1, _t2;\ +\ + _t1 = _mm512_slli_epi32( _in, 1 ); \ + _t2 = _mm512_srli_epi32( _in, 31); \ + _t1 = _mm512_xor_si512( _t1, _mm512_bslli_epi128( _t2, 4 )); \ + _t2 = _mm512_bsrli_epi128( _t2, 12 ); \ + _t2 = _mm512_sub_epi32( _mm512_add_epi32( _mm512_slli_epi32( _t2, 7 ), _mm512_slli_epi32( _t2, 3 ) ), _t2 ); \ + _res = _mm512_xor_si512( _t1, _t2 ); \ +} + +// Multiply by ALPHA^2 +// t1 = Input <<<< 2 +// t2 = Input >>>> 30 +// t1 = t1 ^ (t2 << 32) +// t2 = t2 >> 96 +// t2 = (t2 <<<< 7) ^ (t2 <<<< 2) ^ (t2 <<<< 1) ^ t2 +// res = t1 ^ t2 +#define XTS_MUL_ALPHA2( _in, _res ) \ +{\ + __m128i _t1, _t2;\ +\ + _t1 = _mm_slli_epi32( _in, 2 ); \ + _t2 = _mm_srli_epi32( _in, 30); \ + _t1 = _mm_xor_si128( _t1, _mm_slli_si128( _t2, 4 )); \ + _t2 = _mm_srli_si128( _t2, 12 ); \ + _t2 = _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( _mm_slli_epi32( _t2, 7 ), _mm_slli_epi32( _t2, 2 ) ), _mm_slli_epi32( _t2, 1 )), _t2 ); \ + _res = _mm_xor_si128( _t1, _t2 ); \ +} + +// Multiply by ALPHA^4 +// t1 = Input <<<< 4 +// t2 = Input >>>> 28 +// t1 = t1 ^ (t2 << 32) +// t2 = t2 >> 96 +// t2 = (t2 <<<< 7) ^ (t2 <<<< 2) ^ (t2 <<<< 1) ^ t2 +// res = t1 ^ t2 +#define XTS_MUL_ALPHA4( _in, _res ) \ +{\ + __m128i _t1, _t2;\ +\ + _t1 = _mm_slli_epi32( _in, 4 ); \ + _t2 = _mm_srli_epi32( _in, 28); \ + _t1 = _mm_xor_si128( _t1, _mm_slli_si128( _t2, 4 )); \ + _t2 = _mm_srli_si128( _t2, 12 ); \ + _t2 = _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( _mm_slli_epi32( _t2, 7 ), _mm_slli_epi32( _t2, 2 ) ), _mm_slli_epi32( _t2, 1 )), _t2 ); \ + _res = _mm_xor_si128( _t1, _t2 ); \ +} + +#define XTS_MUL_ALPHA5( _in, _res ) \ +{\ + __m128i _t1, _t2;\ +\ + _t1 = _mm_slli_epi32( _in, 5 ); \ + _t2 = _mm_srli_epi32( _in, 27); \ + _t1 = _mm_xor_si128( _t1, _mm_slli_si128( _t2, 4 )); \ + _t2 = _mm_srli_si128( _t2, 12 ); \ + _t2 = _mm_xor_si128( _mm_xor_si128( _mm_xor_si128( _mm_slli_epi32( _t2, 7 ), _mm_slli_epi32( _t2, 2 ) ), _mm_slli_epi32( _t2, 1 )), _t2 ); \ + _res = _mm_xor_si128( _t1, _t2 ); \ +} + + +// Multiply by ALPHA^8 +// t2 = Input >> 120 +// t2 = (t2 <<<< 7) ^ (t2 <<<< 2) ^ (t2 <<<< 1) ^ t2 +// res = (Input << 8) ^ t2 +// +// Only currently used with VPCLMULQDQ (in Ymm / Zmm versions) as support for non-vectorized PCLMULQDQ is not always supported with AESNI, +// and is sometimes slower than shift+xor + +// __m256i XTS_ALPHA_MULTIPLIER_Ymm = _mm256_set_epi64x( 0, 0x87, 0, 0x87); +#define XTS_MUL_ALPHA8_YMM( _in, _res ) \ +{\ + __m256i _t2;\ +\ + _t2 = _mm256_srli_si256( _in, 15 ); /* AVX2 */ \ + _res = _mm256_slli_si256( _in, 1 ); \ + _t2 = _mm256_clmulepi64_epi128( _t2, XTS_ALPHA_MULTIPLIER_Ymm, 0x00 ); \ + _res = _mm256_xor_si256( _res, _t2 ); \ +} + +#define XTS_MUL_ALPHA16_YMM( _in, _res ) \ +{\ + __m256i _t2;\ +\ + _t2 = _mm256_srli_si256( _in, 14 ); /* AVX2 */ \ + _res = _mm256_slli_si256( _in, 2 ); \ + _t2 = _mm256_clmulepi64_epi128( _t2, XTS_ALPHA_MULTIPLIER_Ymm, 0x00 ); \ + _res = _mm256_xor_si256( _res, _t2 ); \ +} + +// __m512i XTS_ALPHA_MULTIPLIER_Zmm = _mm512_set_epi64( 0, 0x87, 0, 0x87, 0, 0x87, 0, 0x87 ); +#define XTS_MUL_ALPHA8_ZMM( _in, _res ) \ +{\ + __m512i _t2; \ +\ + _t2 = _mm512_bsrli_epi128( _in, 15 ); \ + _res = _mm512_bslli_epi128( _in, 1 ); \ + _t2 = _mm512_clmulepi64_epi128( _t2, XTS_ALPHA_MULTIPLIER_Zmm, 0x00 ); \ + _res = _mm512_xor_si512( _res, _t2 ); \ +} + +#define XTS_MUL_ALPHA16_ZMM( _in, _res ) \ +{\ + __m512i _t2; \ +\ + _t2 = _mm512_bsrli_epi128( _in, 14 ); \ + _res = _mm512_bslli_epi128( _in, 2 ); \ + _t2 = _mm512_clmulepi64_epi128( _t2, XTS_ALPHA_MULTIPLIER_Zmm, 0x00 ); \ + _res = _mm512_xor_si512( _res, _t2 ); \ +} + +// Currently only use UINT64 for x86 and amd64 - this does regress perf on x86 +// but we don't expect a lot of XTS in x86. If the regression causes any real problems +// we can consider introducing another variant. Not doing this now to avoid code bloat +#define XTS_MUL_ALPHA_Scalar( _inout_low_u64, _inout_high_u64 ) \ +{ \ + UINT64 tmp = (UINT64) ((INT64)_inout_high_u64 >> 63); \ + \ + _inout_high_u64 = (_inout_high_u64 << 1) ^ (_inout_low_u64 >> 63); \ + _inout_low_u64 = (_inout_low_u64 << 1) ^ (tmp & 0x87); \ +} diff --git a/libs/symcrypt/lib/xtsaes_pattern.c b/libs/symcrypt/lib/xtsaes_pattern.c new file mode 100644 index 00000000000..f6199cd5d55 --- /dev/null +++ b/libs/symcrypt/lib/xtsaes_pattern.c @@ -0,0 +1,90 @@ +// +// xtsaes_pattern.c +// +// Copyright (c) Microsoft Corporation. Licensed under the MIT license. +// + +#if 0 +#pragma makedep header +#endif + +VOID +SYMCRYPT_CALL +SYMCRYPT_XtsAesXxx( + _In_ PCSYMCRYPT_XTS_AES_EXPANDED_KEY pExpandedKey, + SIZE_T cbDataUnit, + _In_reads_( SYMCRYPT_AES_BLOCK_SIZE ) PCBYTE pbTweak, + _In_reads_( cbData ) PCBYTE pbSrc, + _Out_writes_( cbData ) PBYTE pbDst, + SIZE_T cbData, + BOOLEAN bOverflow ) +{ + SYMCRYPT_XTS_AES_LOCALSCRATCH_DEFN; + // SYMCRYPT_ALIGN BYTE localScratch[N_PARALLEL_TWEAKS * SYMCRYPT_AES_BLOCK_SIZE]; + // or + // /* Defining localScratch as a buffer of __m128is ensures there is required 16B alignment on x86 */ + // __m128i localScratch[ N_PARALLEL_TWEAKS + 16 ]; + // Note that the extra 16 __m128i space is used for internal scratch space for SymCryptXtsAesEncryptDataUnitXmm + // This allows modified tweak generation to be performed in scalar registers in parallel with AES in Xmm register + // which reduces register pressure and increases throughput + PBYTE tweakBuf = (PBYTE) &localScratch[0]; + SIZE_T i, tweakBytes; + UINT64 tweakLow64 = SYMCRYPT_LOAD_LSBFIRST64(pbTweak); + UINT64 tweakHigh64 = SYMCRYPT_LOAD_LSBFIRST64(pbTweak+8); + UINT64 previousTweakLow64; + + SYMCRYPT_ASSERT( cbData % cbDataUnit == 0 ); + + while( cbData >= cbDataUnit ) + { + // + // We encrypt the tweaks of many data units in parallel for best performance. + // In the first loop we build the tweaks and decrement cbData. + // In the second loop we use up all the tweaks, and update the pointers. + // Both loops are executed the same number of times. + // + tweakBytes = 0; + previousTweakLow64 = tweakLow64; + + do // do-while because we know we are going to go through at least once. + { + SYMCRYPT_STORE_LSBFIRST64(&tweakBuf[tweakBytes ], tweakLow64); + SYMCRYPT_STORE_LSBFIRST64(&tweakBuf[tweakBytes + 8], tweakHigh64); + tweakLow64++; + cbData -= cbDataUnit; + tweakBytes += SYMCRYPT_AES_BLOCK_SIZE; + } while( cbData >= cbDataUnit && tweakBytes < SYMCRYPT_AES_BLOCK_SIZE * N_PARALLEL_TWEAKS ); + + if( bOverflow && previousTweakLow64 > tweakLow64 ) + { + // Very rare fix-up of tweaks if tweakLow64 overflowed, and should have incremented tweakHigh64 + // bOverflow=FALSE allows backwards compatibility with old API which wrapped around at 64-bits + SYMCRYPT_ASSERT( tweakLow64 < N_PARALLEL_TWEAKS ); + + // Increment tweakHigh64 and store new value in high half of the previous tweakLow64 tweaks + tweakHigh64++; + for( i=0; i<tweakLow64; i++) + { + SYMCRYPT_STORE_LSBFIRST64(&tweakBuf[tweakBytes - (16*i) - 8], tweakHigh64); + } + } + + SYMCRYPT_AesEcbEncryptXxx( &pExpandedKey->key2, &tweakBuf[0], &tweakBuf[0], tweakBytes ); + + i = 0; + while( i < tweakBytes ) + { + SYMCRYPT_XTSAESDATAUNIT_INVOKE; + // SymCryptXtsAesXxcryptDataUnitXxx( &pExpandedKey->key1, &tweakBuf[i], pbSrc, pbDst, cbDataUnit ); + // or + // SymCryptXtsAesXxcryptDataUnitXxx( &pExpandedKey->key1, &tweakBuf[i], (PBYTE) &localScratch[N_PARALLEL_TWEAKS], pbSrc, pbDst, cbDataUnit ); + // Note that the scratch space being provided to the DataUnit function is an offset into the localScratch buffer + + pbSrc += cbDataUnit; + pbDst += cbDataUnit; + i += SYMCRYPT_AES_BLOCK_SIZE; + } + } + + SymCryptWipeKnownSize( localScratch, sizeof( localScratch ) ); +} diff --git a/tools/make_makefiles b/tools/make_makefiles index a6d0d6b210c..194940d8aac 100755 --- a/tools/make_makefiles +++ b/tools/make_makefiles @@ -303,6 +303,11 @@ sub assign_sources_to_makefiles(@) { next; } + else + { + my %flags = get_makedep_flags($file); + next if defined $flags{header}; + } push @{${$make}{"=SOURCES"}}, $name; } diff --git a/tools/makedep.c b/tools/makedep.c index 90a522640fe..ca61fb6a9dc 100644 --- a/tools/makedep.c +++ b/tools/makedep.c @@ -3748,6 +3748,7 @@ static const struct { "l", output_source_l }, { "h", output_source_h }, { "rh", output_source_h }, + { "inc", output_source_h }, { "inl", output_source_h }, { "ver", output_source_ver }, { "rc", output_source_rc }, -- GitLab https://gitlab.winehq.org/wine/wine/-/merge_requests/11025